vendor/llvm/llvm-trunk-r321017

author: Dimitry Andric <dim@FreeBSD.org> 2017-12-18 20:10:56 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2017-12-18 20:10:56 +0000
commit: 044eb2f6afba375a914ac9d8024f8f5142bb912e (patch)
tree: 1475247dc9f9fe5be155ebd4c9069c75aadf8c20 /include/llvm/CodeGen
parent: eb70dddbd77e120e5d490bd8fbe7ff3f8fa81c6b (diff)
89 files changed, 11674 insertions, 1799 deletions
diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h
index 60bbc9aaa5bd4..b8944a668000d 100644
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@@ -43,11 +43,11 @@ class DIE;
 class DIEAbbrev;
 class DwarfDebug;
 class GCMetadataPrinter;
+class GCStrategy;
 class GlobalIndirectSymbol;
 class GlobalObject;
 class GlobalValue;
 class GlobalVariable;
-class GCStrategy;
 class MachineBasicBlock;
 class MachineConstantPoolValue;
 class MachineFunction;
@@ -58,6 +58,7 @@ class MachineModuleInfo;
 class MachineOptimizationRemarkEmitter;
 class MCAsmInfo;
 class MCCFIInstruction;
+struct MCCodePaddingContext;
 class MCContext;
 class MCExpr;
 class MCInst;
@@ -76,11 +77,9 @@ class TargetMachine;
 class AsmPrinter : public MachineFunctionPass {
 public:
   /// Target machine description.
-  ///
   TargetMachine &TM;
 
   /// Target Asm Printer information.
-  ///
   const MCAsmInfo *MAI;
 
   /// This is the context for the output file that we are streaming. This owns
@@ -103,7 +102,6 @@ public:
 
   /// The symbol for the current function. This is recalculated at the beginning
   /// of each call to runOnMachineFunction().
-  ///
   MCSymbol *CurrentFnSym = nullptr;
 
   /// The symbol used to represent the start of the current function for the
@@ -116,7 +114,7 @@ public:
   using GOTEquivUsePair = std::pair<const GlobalVariable *, unsigned>;
   MapVector<const MCSymbol *, GOTEquivUsePair> GlobalGOTEquivs;
 
-  /// Enable print [latency:throughput] in output
+  /// Enable print [latency:throughput] in output.
   bool EnablePrintSchedInfo = false;
 
 private:
@@ -128,8 +126,8 @@ private:
   void *GCMetadataPrinters = nullptr; // Really a DenseMap.
 
   /// Emit comments in assembly output if this is true.
-  ///
   bool VerboseAsm;
+
   static char ID;
 
   /// If VerboseAsm is set, a pointer to the loop info for this function.
@@ -149,6 +147,7 @@ private:
           TimerDescription(TimerDescription), TimerGroupName(TimerGroupName),
           TimerGroupDescription(TimerGroupDescription) {}
   };
+
   /// A vector of all debug/EH info emitters we should use. This vector
   /// maintains ownership of the emitters.
   SmallVector<HandlerInfo, 1> Handlers;
@@ -187,11 +186,9 @@ public:
   bool isPositionIndependent() const;
 
   /// Return true if assembly output should contain comments.
-  ///
   bool isVerbose() const { return VerboseAsm; }
 
   /// Return a unique ID for the current function.
-  ///
   unsigned getFunctionNumber() const;
 
   MCSymbol *getFunctionBegin() const { return CurrentFnBegin; }
@@ -235,13 +232,15 @@ public:
 
   // The table will contain these structs that point to the sled, the function
   // containing the sled, and what kind of sled (and whether they should always
-  // be instrumented).
+  // be instrumented). We also use a version identifier that the runtime can use
+  // to decide what to do with the sled, depending on the version of the sled.
   struct XRayFunctionEntry {
     const MCSymbol *Sled;
     const MCSymbol *Function;
     SledKind Kind;
     bool AlwaysInstrument;
     const class Function *Fn;
+    uint8_t Version;
 
     void emit(int, MCStreamer *, const MCSymbol *) const;
   };
@@ -249,8 +248,12 @@ public:
   // All the sleds to be emitted.
   SmallVector<XRayFunctionEntry, 4> Sleds;
 
+  // A unique ID used for ELF sections associated with a particular function.
+  unsigned XRayFnUniqueID = 0;
+
   // Helper function to record a given XRay sled.
-  void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind);
+  void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind,
+                  uint8_t Version = 0);
 
   /// Emit a table with all XRay instrumentation points.
   void emitXRayTable();
@@ -260,7 +263,6 @@ public:
   //===------------------------------------------------------------------===//
 
   /// Record analysis usage.
-  ///
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 
   /// Set up the AsmPrinter when we are working on a new module. If your pass
@@ -293,8 +295,10 @@ public:
 
   void emitFrameAlloc(const MachineInstr &MI);
 
+  void emitStackSizeSection(const MachineFunction &MF);
+
   enum CFIMoveType { CFI_M_None, CFI_M_EH, CFI_M_Debug };
-  CFIMoveType needsCFIMoves();
+  CFIMoveType needsCFIMoves() const;
 
   /// Returns false if needsCFIMoves() == CFI_M_EH for any function
   /// in the module.
@@ -305,12 +309,10 @@ public:
   /// Print to the current output stream assembly representations of the
   /// constants in the constant pool MCP. This is used to print out constants
   /// which have been "spilled to memory" by the code generator.
-  ///
   virtual void EmitConstantPool();
 
   /// Print assembly representations of the jump tables used by the current
   /// function to the current output stream.
-  ///
   virtual void EmitJumpTableInfo();
 
   /// Emit the specified global variable to the .s file.
@@ -325,7 +327,6 @@ public:
   /// global value is specified, and if that global has an explicit alignment
   /// requested, it will override the alignment request if required for
   /// correctness.
-  ///
   void EmitAlignment(unsigned NumBits, const GlobalObject *GO = nullptr) const;
 
   /// Lower the specified LLVM Constant to an MCExpr.
@@ -379,7 +380,7 @@ public:
   virtual void EmitBasicBlockStart(const MachineBasicBlock &MBB) const;
 
   /// Targets can override this to emit stuff at the end of a basic block.
-  virtual void EmitBasicBlockEnd(const MachineBasicBlock &MBB) {}
+  virtual void EmitBasicBlockEnd(const MachineBasicBlock &MBB);
 
   /// Targets should implement this to emit instructions.
   virtual void EmitInstruction(const MachineInstr *) {
@@ -443,15 +444,12 @@ public:
   void printOffset(int64_t Offset, raw_ostream &OS) const;
 
   /// Emit a byte directive and value.
-  ///
   void EmitInt8(int Value) const;
 
   /// Emit a short directive and value.
-  ///
   void EmitInt16(int Value) const;
 
   /// Emit a long directive and value.
-  ///
   void EmitInt32(int Value) const;
 
   /// Emit something like ".long Hi-Lo" where the size in bytes of the directive
@@ -481,8 +479,12 @@ public:
   void EmitSLEB128(int64_t Value, const char *Desc = nullptr) const;
 
   /// Emit the specified unsigned leb128 value.
-  void EmitULEB128(uint64_t Value, const char *Desc = nullptr,
-                   unsigned PadTo = 0) const;
+  void EmitULEB128(uint64_t Value, const char *Desc = nullptr) const;
+
+  /// Emit the specified unsigned leb128 value padded to a specific number
+  /// bytes
+  void EmitPaddedULEB128(uint64_t Value, unsigned PadTo,
+                         const char *Desc = nullptr) const;
 
   /// Emit a .byte 42 directive that corresponds to an encoding.  If verbose
   /// assembly output is enabled, we output comments describing the encoding.
@@ -622,10 +624,13 @@ private:
   void EmitModuleIdents(Module &M);
   void EmitXXStructorList(const DataLayout &DL, const Constant *List,
                           bool isCtor);
+
   GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy &C);
   /// Emit GlobalAlias or GlobalIFunc.
   void emitGlobalIndirectSymbol(Module &M,
                                 const GlobalIndirectSymbol& GIS);
+  void setupCodePaddingContext(const MachineBasicBlock &MBB,
+                               MCCodePaddingContext &Context) const;
 };
 
 } // end namespace llvm
diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h
index 6331070247928..bb5e7f9e8e30f 100644
--- a/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/include/llvm/CodeGen/BasicTTIImpl.h
@@ -6,25 +6,63 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
+//
 /// \file
 /// This file provides a helper that implements much of the TTI interface in
 /// terms of the target-independent code generator and TargetLowering
 /// interfaces.
-///
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CODEGEN_BASICTTIIMPL_H
 #define LLVM_CODEGEN_BASICTTIIMPL_H
 
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/TargetTransformInfoImpl.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <limits>
+#include <utility>
 
 namespace llvm {
 
+class Function;
+class GlobalValue;
+class LLVMContext;
+class ScalarEvolution;
+class SCEV;
+class TargetMachine;
+
 extern cl::opt<unsigned> PartialUnrollingThreshold;
 
 /// \brief Base class which can be used to help build a TTI implementation.
@@ -39,8 +77,8 @@ extern cl::opt<unsigned> PartialUnrollingThreshold;
 template <typename T>
 class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
 private:
-  typedef TargetTransformInfoImplCRTPBase<T> BaseT;
-  typedef TargetTransformInfo TTI;
+  using BaseT = TargetTransformInfoImplCRTPBase<T>;
+  using TTI = TargetTransformInfo;
 
   /// Estimate a cost of shuffle as a sequence of extract and insert
   /// operations.
@@ -110,13 +148,13 @@ public:
 
   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
                              bool HasBaseReg, int64_t Scale,
-                             unsigned AddrSpace) {
+                             unsigned AddrSpace, Instruction *I = nullptr) {
     TargetLoweringBase::AddrMode AM;
     AM.BaseGV = BaseGV;
     AM.BaseOffs = BaseOffset;
     AM.HasBaseReg = HasBaseReg;
     AM.Scale = Scale;
-    return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace);
+    return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
   }
 
   bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
@@ -133,10 +171,6 @@ public:
     return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
   }
 
-  bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) {
-    return getTLI()->isFoldableMemAccessOffset(I, Offset);
-  }
-
   bool isTruncateFree(Type *Ty1, Type *Ty2) {
     return getTLI()->isTruncateFree(Ty1, Ty2);
   }
@@ -235,7 +269,8 @@ public:
       if (N < 2 || N < TLI->getMinimumJumpTableEntries())
         return N;
       uint64_t Range =
-          (MaxCaseVal - MinCaseVal).getLimitedValue(UINT64_MAX - 1) + 1;
+          (MaxCaseVal - MinCaseVal)
+              .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
       // Check whether a range of clusters is dense enough for a jump table
       if (TLI->isSuitableForJumpTable(&SI, N, Range)) {
         JumpTableSize = Range;
@@ -262,6 +297,10 @@ public:
            TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
   }
 
+  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) {
+    return true;
+  }
+
   unsigned getFPOpCost(Type *Ty) {
     // By default, FP instructions are no more expensive since they are
     // implemented in HW.  Target specific TTI can override this.
@@ -272,17 +311,15 @@ public:
     const TargetLoweringBase *TLI = getTLI();
     switch (Opcode) {
     default: break;
-    case Instruction::Trunc: {
+    case Instruction::Trunc:
       if (TLI->isTruncateFree(OpTy, Ty))
         return TargetTransformInfo::TCC_Free;
       return TargetTransformInfo::TCC_Basic;
-    }
-    case Instruction::ZExt: {
+    case Instruction::ZExt:
       if (TLI->isZExtFree(OpTy, Ty))
         return TargetTransformInfo::TCC_Free;
       return TargetTransformInfo::TCC_Basic;
     }
-    }
 
     return BaseT::getOperationCost(Opcode, Ty, OpTy);
   }
@@ -354,6 +391,13 @@ public:
     UP.BEInsns = 2;
   }
 
+  int getInstructionLatency(const Instruction *I) {
+    if (isa<LoadInst>(I))
+      return getST()->getSchedModel().DefaultLoadLatency;
+
+    return BaseT::getInstructionLatency(I);
+  }
+
   /// @}
 
   /// \name Vector TTI Implementations
@@ -394,8 +438,8 @@ public:
         if (A->getType()->isVectorTy()) {
           VecTy = A->getType();
           // If A is a vector operand, VF should be 1 or correspond to A.
-          assert ((VF == 1 || VF == VecTy->getVectorNumElements()) &&
-                  "Vector argument does not match VF");
+          assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&
+                 "Vector argument does not match VF");
         }
         else
           VecTy = VectorType::get(A->getType(), VF);
@@ -408,8 +452,8 @@ public:
   }
 
   unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) {
-    assert (VecTy->isVectorTy());
-    
+    assert(VecTy->isVectorTy());
+
     unsigned Cost = 0;
 
     Cost += getScalarizationOverhead(VecTy, true, false);
@@ -531,7 +575,6 @@ public:
 
     // Handle scalar conversions.
     if (!Src->isVectorTy() && !Dst->isVectorTy()) {
-
       // Scalar bitcasts are usually free.
       if (Opcode == Instruction::BitCast)
         return 0;
@@ -547,7 +590,6 @@ public:
 
     // Check vector-to-vector casts.
     if (Dst->isVectorTy() && Src->isVectorTy()) {
-
       // If the cast is between same-sized registers, then the check is simple.
       if (SrcLT.first == DstLT.first &&
           SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
@@ -743,7 +785,6 @@ public:
     // We only scale the cost of loads since interleaved store groups aren't
     // allowed to have gaps.
     if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) {
-
       // The number of loads of a legal type it will take to represent a load
       // of the unlegalized vector type.
       unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize);
@@ -821,7 +862,7 @@ public:
                                  ArrayRef<Value *> Args, FastMathFlags FMF,
                                  unsigned VF = 1) {
     unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
-    assert ((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
+    assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
 
     switch (IID) {
     default: {
@@ -829,7 +870,7 @@ public:
       SmallVector<Type *, 4> Types;
       for (Value *Op : Args) {
         Type *OpTy = Op->getType();
-        assert (VF == 1 || !OpTy->isVectorTy());
+        assert(VF == 1 || !OpTy->isVectorTy());
         Types.push_back(VF == 1 ? OpTy : VectorType::get(OpTy, VF));
       }
 
@@ -839,7 +880,7 @@ public:
       // Compute the scalarization overhead based on Args for a vector
       // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
       // CostModel will pass a vector RetTy and VF is 1.
-      unsigned ScalarizationCost = UINT_MAX;
+      unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
       if (RetVF > 1 || VF > 1) {
         ScalarizationCost = 0;
         if (!RetTy->isVoidTy())
@@ -851,7 +892,7 @@ public:
         getIntrinsicInstrCost(IID, RetTy, Types, FMF, ScalarizationCost);
     }
     case Intrinsic::masked_scatter: {
-      assert (VF == 1 && "Can't vectorize types here.");
+      assert(VF == 1 && "Can't vectorize types here.");
       Value *Mask = Args[3];
       bool VarMask = !isa<Constant>(Mask);
       unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
@@ -862,7 +903,7 @@ public:
                                                        Alignment);
     }
     case Intrinsic::masked_gather: {
-      assert (VF == 1 && "Can't vectorize types here.");
+      assert(VF == 1 && "Can't vectorize types here.");
       Value *Mask = Args[2];
       bool VarMask = !isa<Constant>(Mask);
       unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
@@ -873,13 +914,14 @@ public:
     }
     }
   }
-  
+
   /// Get intrinsic cost based on argument types.
-  /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the
-  /// arguments and the return value will be computed based on types.
-  unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
-                          ArrayRef<Type *> Tys, FastMathFlags FMF,
-                          unsigned ScalarizationCostPassed = UINT_MAX) {
+  /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the
+  /// cost of scalarizing the arguments and the return value will be computed
+  /// based on types.
+  unsigned getIntrinsicInstrCost(
+      Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
+      unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
     SmallVector<unsigned, 2> ISDs;
     unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
     switch (IID) {
@@ -889,7 +931,7 @@ public:
       unsigned ScalarCalls = 1;
       Type *ScalarRetTy = RetTy;
       if (RetTy->isVectorTy()) {
-        if (ScalarizationCostPassed == UINT_MAX)
+        if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
           ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
         ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
         ScalarRetTy = RetTy->getScalarType();
@@ -898,7 +940,7 @@ public:
       for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
         Type *Ty = Tys[i];
         if (Ty->isVectorTy()) {
-          if (ScalarizationCostPassed == UINT_MAX)
+          if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
             ScalarizationCost += getScalarizationOverhead(Ty, false, true);
           ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
           Ty = Ty->getScalarType();
@@ -985,6 +1027,7 @@ public:
     // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
     case Intrinsic::lifetime_start:
     case Intrinsic::lifetime_end:
+    case Intrinsic::sideeffect:
       return 0;
     case Intrinsic::masked_store:
       return static_cast<T *>(this)
@@ -1047,8 +1090,10 @@ public:
     // this will emit a costly libcall, adding call overhead and spills. Make it
     // very expensive.
     if (RetTy->isVectorTy()) {
-      unsigned ScalarizationCost = ((ScalarizationCostPassed != UINT_MAX) ?
-         ScalarizationCostPassed : getScalarizationOverhead(RetTy, true, false));
+      unsigned ScalarizationCost =
+          ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max())
+               ? ScalarizationCostPassed
+               : getScalarizationOverhead(RetTy, true, false));
       unsigned ScalarCalls = RetTy->getVectorNumElements();
       SmallVector<Type *, 4> ScalarTys;
       for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
@@ -1061,7 +1106,7 @@ public:
           IID, RetTy->getScalarType(), ScalarTys, FMF);
       for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
         if (Tys[i]->isVectorTy()) {
-          if (ScalarizationCostPassed == UINT_MAX)
+          if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
             ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
           ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
         }
@@ -1096,7 +1141,7 @@ public:
 
   unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *,
                                      const SCEV *) {
-    return 0; 
+    return 0;
   }
 
   /// Try to calculate arithmetic and shuffle op costs for reduction operations.
@@ -1134,7 +1179,8 @@ public:
   ///
   /// The cost model should take into account that the actual length of the
   /// vector is reduced on each iteration.
-  unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise) {
+  unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty,
+                                      bool IsPairwise) {
     assert(Ty->isVectorTy() && "Expect a vector type");
     Type *ScalarTy = Ty->getVectorElementType();
     unsigned NumVecElts = Ty->getVectorNumElements();
@@ -1159,7 +1205,7 @@ public:
     }
     // The minimal length of the vector is limited by the real length of vector
     // operations performed on the current platform. That's why several final
-    // reduction opertions are perfomed on the vectors with the same
+    // reduction operations are performed on the vectors with the same
     // architecture-dependent length.
     ShuffleCost += (NumReduxLevels - LongVectorCount) * (IsPairwise + 1) *
                    ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
@@ -1169,6 +1215,66 @@ public:
     return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true);
   }
 
+  /// Try to calculate op costs for min/max reduction operations.
+  /// \param CondTy Conditional type for the Select instruction.
+  unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise,
+                                  bool) {
+    assert(Ty->isVectorTy() && "Expect a vector type");
+    Type *ScalarTy = Ty->getVectorElementType();
+    Type *ScalarCondTy = CondTy->getVectorElementType();
+    unsigned NumVecElts = Ty->getVectorNumElements();
+    unsigned NumReduxLevels = Log2_32(NumVecElts);
+    unsigned CmpOpcode;
+    if (Ty->isFPOrFPVectorTy()) {
+      CmpOpcode = Instruction::FCmp;
+    } else {
+      assert(Ty->isIntOrIntVectorTy() &&
+             "expecting floating point or integer type for min/max reduction");
+      CmpOpcode = Instruction::ICmp;
+    }
+    unsigned MinMaxCost = 0;
+    unsigned ShuffleCost = 0;
+    auto *ConcreteTTI = static_cast<T *>(this);
+    std::pair<unsigned, MVT> LT =
+        ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
+    unsigned LongVectorCount = 0;
+    unsigned MVTLen =
+        LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
+    while (NumVecElts > MVTLen) {
+      NumVecElts /= 2;
+      // Assume the pairwise shuffles add a cost.
+      ShuffleCost += (IsPairwise + 1) *
+                     ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
+                                                 NumVecElts, Ty);
+      MinMaxCost +=
+          ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
+          ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
+                                          nullptr);
+      Ty = VectorType::get(ScalarTy, NumVecElts);
+      CondTy = VectorType::get(ScalarCondTy, NumVecElts);
+      ++LongVectorCount;
+    }
+    // The minimal length of the vector is limited by the real length of vector
+    // operations performed on the current platform. That's why several final
+    // reduction opertions are perfomed on the vectors with the same
+    // architecture-dependent length.
+    ShuffleCost += (NumReduxLevels - LongVectorCount) * (IsPairwise + 1) *
+                   ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
+                                               NumVecElts, Ty);
+    MinMaxCost +=
+        (NumReduxLevels - LongVectorCount) *
+        (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
+         ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
+                                         nullptr));
+    // Need 3 extractelement instructions for scalarization + an additional
+    // scalar select instruction.
+    return ShuffleCost + MinMaxCost +
+           3 * getScalarizationOverhead(Ty, /*Insert=*/false,
+                                        /*Extract=*/true) +
+           ConcreteTTI->getCmpSelInstrCost(Instruction::Select, ScalarTy,
+                                           ScalarCondTy, nullptr);
+  }
+
   unsigned getVectorSplitCost() { return 1; }
 
   /// @}
@@ -1177,7 +1283,8 @@ public:
 /// \brief Concrete BasicTTIImpl that can be used if no further customization
 /// is needed.
 class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
-  typedef BasicTTIImplBase<BasicTTIImpl> BaseT;
+  using BaseT = BasicTTIImplBase<BasicTTIImpl>;
+
   friend class BasicTTIImplBase<BasicTTIImpl>;
 
   const TargetSubtargetInfo *ST;
@@ -1190,6 +1297,6 @@ public:
   explicit BasicTTIImpl(const TargetMachine *ST, const Function &F);
 };
 
-}
+} // end namespace llvm
 
-#endif
+#endif // LLVM_CODEGEN_BASICTTIIMPL_H
diff --git a/include/llvm/CodeGen/CalcSpillWeights.h b/include/llvm/CodeGen/CalcSpillWeights.h
index 17c9415a81cbd..d9e8206408a78 100644
--- a/include/llvm/CodeGen/CalcSpillWeights.h
+++ b/include/llvm/CodeGen/CalcSpillWeights.h
@@ -1,4 +1,4 @@
-//===---------------- lib/CodeGen/CalcSpillWeights.h ------------*- C++ -*-===//
+//===- lib/CodeGen/CalcSpillWeights.h ---------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,7 +7,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-
 #ifndef LLVM_CODEGEN_CALCSPILLWEIGHTS_H
 #define LLVM_CODEGEN_CALCSPILLWEIGHTS_H
 
@@ -16,11 +15,12 @@
 
 namespace llvm {
 
-  class LiveInterval;
-  class LiveIntervals;
-  class MachineBlockFrequencyInfo;
-  class MachineLoopInfo;
-  class VirtRegMap;
+class LiveInterval;
+class LiveIntervals;
+class MachineBlockFrequencyInfo;
+class MachineFunction;
+class MachineLoopInfo;
+class VirtRegMap;
 
   /// \brief Normalize the spill weight of a live interval
   ///
@@ -32,7 +32,6 @@ namespace llvm {
   ///                   per function call. Derived from block frequencies.
   /// @param Size       Size of live interval as returnexd by getSize()
   /// @param NumInstr   Number of instructions using this live interval
-  ///
   static inline float normalizeSpillWeight(float UseDefFreq, unsigned Size,
                                            unsigned NumInstr) {
     // The constant 25 instructions is added to avoid depending too much on
@@ -47,7 +46,7 @@ namespace llvm {
   /// spill weight and allocation hint.
   class VirtRegAuxInfo {
   public:
-    typedef float (*NormalizingFn)(float, unsigned, unsigned);
+    using NormalizingFn = float (*)(float, unsigned, unsigned);
 
   private:
     MachineFunction &MF;
@@ -67,6 +66,32 @@ namespace llvm {
 
     /// \brief (re)compute li's spill weight and allocation hint.
     void calculateSpillWeightAndHint(LiveInterval &li);
+
+    /// \brief Compute future expected spill weight of a split artifact of li
+    /// that will span between start and end slot indexes.
+    /// \param li     The live interval to be split.
+    /// \param start  The expected begining of the split artifact. Instructions
+    ///               before start will not affect the weight.
+    /// \param end    The expected end of the split artifact. Instructions
+    ///               after end will not affect the weight.
+    /// \return The expected spill weight of the split artifact. Returns
+    /// negative weight for unspillable li.
+    float futureWeight(LiveInterval &li, SlotIndex start, SlotIndex end);
+
+    /// \brief Helper function for weight calculations.
+    /// (Re)compute li's spill weight and allocation hint, or, for non null
+    /// start and end - compute future expected spill weight of a split
+    /// artifact of li that will span between start and end slot indexes.
+    /// \param li     The live interval for which to compute the weight.
+    /// \param start  The expected begining of the split artifact. Instructions
+    ///               before start will not affect the weight. Relevant for
+    ///               weight calculation of future split artifact.
+    /// \param end    The expected end of the split artifact. Instructions
+    ///               after end will not affect the weight. Relevant for
+    ///               weight calculation of future split artifact.
+    /// \return The spill weight. Returns negative weight for unspillable li.
+    float weightCalcHelper(LiveInterval &li, SlotIndex *start = nullptr,
+                           SlotIndex *end = nullptr);
   };
 
   /// \brief Compute spill weights and allocation hints for all virtual register
@@ -77,6 +102,7 @@ namespace llvm {
                                      const MachineBlockFrequencyInfo &MBFI,
                                      VirtRegAuxInfo::NormalizingFn norm =
                                          normalizeSpillWeight);
-}
+
+} // end namespace llvm
 
 #endif // LLVM_CODEGEN_CALCSPILLWEIGHTS_H
diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h
index 50e464ebb9b80..d30a27328c012 100644
--- a/include/llvm/CodeGen/CallingConvLower.h
+++ b/include/llvm/CodeGen/CallingConvLower.h
@@ -1,4 +1,4 @@
-//===-- llvm/CallingConvLower.h - Calling Conventions -----------*- C++ -*-===//
+//===- llvm/CallingConvLower.h - Calling Conventions ------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -18,11 +18,12 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/TargetCallingConv.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Target/TargetCallingConv.h"
 
 namespace llvm {
+
 class CCState;
 class MVT;
 class TargetMachine;
@@ -200,6 +201,7 @@ private:
   unsigned MaxStackArgAlign;
   SmallVector<uint32_t, 16> UsedRegs;
   SmallVector<CCValAssign, 4> PendingLocs;
+  SmallVector<ISD::ArgFlagsTy, 4> PendingArgFlags;
 
   // ByValInfo and SmallVector<ByValInfo, 4> ByValRegs:
   //
@@ -503,10 +505,15 @@ public:
   }
 
   // Get list of pending assignments
-  SmallVectorImpl<llvm::CCValAssign> &getPendingLocs() {
+  SmallVectorImpl<CCValAssign> &getPendingLocs() {
     return PendingLocs;
   }
 
+  // Get a list of argflags for pending assignments.
+  SmallVectorImpl<ISD::ArgFlagsTy> &getPendingArgFlags() {
+    return PendingArgFlags;
+  }
+
   /// Compute the remaining unused register parameters that would be used for
   /// the given value type. This is useful when varargs are passed in the
   /// registers that normal prototyped parameters would be passed in, or for
@@ -564,8 +571,6 @@ private:
   void MarkAllocated(unsigned Reg);
 };
 
-
-
 } // end namespace llvm
 
-#endif
+#endif // LLVM_CODEGEN_CALLINGCONVLOWER_H
diff --git a/include/llvm/CodeGen/CommandFlags.def b/include/llvm/CodeGen/CommandFlags.def
new file mode 100644
index 0000000000000..fe96033a9c617
--- /dev/null
+++ b/include/llvm/CodeGen/CommandFlags.def
@@ -0,0 +1,366 @@
+//===-- CommandFlags.h - Command Line Flags Interface -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains codegen-specific flags that are shared between different
+// command line tools. The tools "llc" and "opt" both use this file to prevent
+// flag duplication.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCTargetOptionsCommandFlags.def"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <string>
+using namespace llvm;
+
+static cl::opt<std::string>
+    MArch("march",
+          cl::desc("Architecture to generate code for (see --version)"));
+
+static cl::opt<std::string>
+    MCPU("mcpu",
+         cl::desc("Target a specific cpu type (-mcpu=help for details)"),
+         cl::value_desc("cpu-name"), cl::init(""));
+
+static cl::list<std::string>
+    MAttrs("mattr", cl::CommaSeparated,
+           cl::desc("Target specific attributes (-mattr=help for details)"),
+           cl::value_desc("a1,+a2,-a3,..."));
+
+static cl::opt<Reloc::Model> RelocModel(
+    "relocation-model", cl::desc("Choose relocation model"),
+    cl::values(
+        clEnumValN(Reloc::Static, "static", "Non-relocatable code"),
+        clEnumValN(Reloc::PIC_, "pic",
+                   "Fully relocatable, position independent code"),
+        clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic",
+                   "Relocatable external references, non-relocatable code"),
+        clEnumValN(Reloc::ROPI, "ropi",
+                   "Code and read-only data relocatable, accessed PC-relative"),
+        clEnumValN(
+            Reloc::RWPI, "rwpi",
+            "Read-write data relocatable, accessed relative to static base"),
+        clEnumValN(Reloc::ROPI_RWPI, "ropi-rwpi",
+                   "Combination of ropi and rwpi")));
+
+LLVM_ATTRIBUTE_UNUSED static Optional<Reloc::Model> getRelocModel() {
+  if (RelocModel.getNumOccurrences()) {
+    Reloc::Model R = RelocModel;
+    return R;
+  }
+  return None;
+}
+
+static cl::opt<ThreadModel::Model> TMModel(
+    "thread-model", cl::desc("Choose threading model"),
+    cl::init(ThreadModel::POSIX),
+    cl::values(clEnumValN(ThreadModel::POSIX, "posix", "POSIX thread model"),
+               clEnumValN(ThreadModel::Single, "single",
+                          "Single thread model")));
+
+static cl::opt<llvm::CodeModel::Model> CMModel(
+    "code-model", cl::desc("Choose code model"),
+    cl::values(clEnumValN(CodeModel::Small, "small", "Small code model"),
+               clEnumValN(CodeModel::Kernel, "kernel", "Kernel code model"),
+               clEnumValN(CodeModel::Medium, "medium", "Medium code model"),
+               clEnumValN(CodeModel::Large, "large", "Large code model")));
+
+LLVM_ATTRIBUTE_UNUSED static Optional<CodeModel::Model> getCodeModel() {
+  if (CMModel.getNumOccurrences()) {
+    CodeModel::Model M = CMModel;
+    return M;
+  }
+  return None;
+}
+
+static cl::opt<llvm::ExceptionHandling> ExceptionModel(
+    "exception-model", cl::desc("exception model"),
+    cl::init(ExceptionHandling::None),
+    cl::values(
+        clEnumValN(ExceptionHandling::None, "default",
+                   "default exception handling model"),
+        clEnumValN(ExceptionHandling::DwarfCFI, "dwarf",
+                   "DWARF-like CFI based exception handling"),
+        clEnumValN(ExceptionHandling::SjLj, "sjlj", "SjLj exception handling"),
+        clEnumValN(ExceptionHandling::ARM, "arm", "ARM EHABI exceptions"),
+        clEnumValN(ExceptionHandling::WinEH, "wineh",
+                   "Windows exception model")));
+
+static cl::opt<TargetMachine::CodeGenFileType> FileType(
+    "filetype", cl::init(TargetMachine::CGFT_AssemblyFile),
+    cl::desc(
+        "Choose a file type (not all types are supported by all targets):"),
+    cl::values(clEnumValN(TargetMachine::CGFT_AssemblyFile, "asm",
+                          "Emit an assembly ('.s') file"),
+               clEnumValN(TargetMachine::CGFT_ObjectFile, "obj",
+                          "Emit a native object ('.o') file"),
+               clEnumValN(TargetMachine::CGFT_Null, "null",
+                          "Emit nothing, for performance testing")));
+
+static cl::opt<bool>
+    DisableFPElim("disable-fp-elim",
+                  cl::desc("Disable frame pointer elimination optimization"),
+                  cl::init(false));
+
+static cl::opt<bool> EnableUnsafeFPMath(
+    "enable-unsafe-fp-math",
+    cl::desc("Enable optimizations that may decrease FP precision"),
+    cl::init(false));
+
+static cl::opt<bool> EnableNoInfsFPMath(
+    "enable-no-infs-fp-math",
+    cl::desc("Enable FP math optimizations that assume no +-Infs"),
+    cl::init(false));
+
+static cl::opt<bool> EnableNoNaNsFPMath(
+    "enable-no-nans-fp-math",
+    cl::desc("Enable FP math optimizations that assume no NaNs"),
+    cl::init(false));
+
+static cl::opt<bool> EnableNoSignedZerosFPMath(
+    "enable-no-signed-zeros-fp-math",
+    cl::desc("Enable FP math optimizations that assume "
+             "the sign of 0 is insignificant"),
+    cl::init(false));
+
+static cl::opt<bool>
+    EnableNoTrappingFPMath("enable-no-trapping-fp-math",
+                           cl::desc("Enable setting the FP exceptions build "
+                                    "attribute not to use exceptions"),
+                           cl::init(false));
+
+static cl::opt<llvm::FPDenormal::DenormalMode> DenormalMode(
+    "denormal-fp-math",
+    cl::desc("Select which denormal numbers the code is permitted to require"),
+    cl::init(FPDenormal::IEEE),
+    cl::values(clEnumValN(FPDenormal::IEEE, "ieee",
+                          "IEEE 754 denormal numbers"),
+               clEnumValN(FPDenormal::PreserveSign, "preserve-sign",
+                          "the sign of a  flushed-to-zero number is preserved "
+                          "in the sign of 0"),
+               clEnumValN(FPDenormal::PositiveZero, "positive-zero",
+                          "denormals are flushed to positive zero")));
+
+static cl::opt<bool> EnableHonorSignDependentRoundingFPMath(
+    "enable-sign-dependent-rounding-fp-math", cl::Hidden,
+    cl::desc("Force codegen to assume rounding mode can change dynamically"),
+    cl::init(false));
+
+static cl::opt<llvm::FloatABI::ABIType> FloatABIForCalls(
+    "float-abi", cl::desc("Choose float ABI type"), cl::init(FloatABI::Default),
+    cl::values(clEnumValN(FloatABI::Default, "default",
+                          "Target default float ABI type"),
+               clEnumValN(FloatABI::Soft, "soft",
+                          "Soft float ABI (implied by -soft-float)"),
+               clEnumValN(FloatABI::Hard, "hard",
+                          "Hard float ABI (uses FP registers)")));
+
+static cl::opt<llvm::FPOpFusion::FPOpFusionMode> FuseFPOps(
+    "fp-contract", cl::desc("Enable aggressive formation of fused FP ops"),
+    cl::init(FPOpFusion::Standard),
+    cl::values(
+        clEnumValN(FPOpFusion::Fast, "fast", "Fuse FP ops whenever profitable"),
+        clEnumValN(FPOpFusion::Standard, "on", "Only fuse 'blessed' FP ops."),
+        clEnumValN(FPOpFusion::Strict, "off",
+                   "Only fuse FP ops when the result won't be affected.")));
+
+static cl::opt<bool> DontPlaceZerosInBSS(
+    "nozero-initialized-in-bss",
+    cl::desc("Don't place zero-initialized symbols into bss section"),
+    cl::init(false));
+
+static cl::opt<bool> EnableGuaranteedTailCallOpt(
+    "tailcallopt",
+    cl::desc(
+        "Turn fastcc calls into tail calls by (potentially) changing ABI."),
+    cl::init(false));
+
+static cl::opt<bool> DisableTailCalls("disable-tail-calls",
+                                      cl::desc("Never emit tail calls"),
+                                      cl::init(false));
+
+static cl::opt<bool> StackSymbolOrdering("stack-symbol-ordering",
+                                         cl::desc("Order local stack symbols."),
+                                         cl::init(true));
+
+static cl::opt<unsigned>
+    OverrideStackAlignment("stack-alignment",
+                           cl::desc("Override default stack alignment"),
+                           cl::init(0));
+
+static cl::opt<bool>
+    StackRealign("stackrealign",
+                 cl::desc("Force align the stack to the minimum alignment"),
+                 cl::init(false));
+
+static cl::opt<std::string> TrapFuncName(
+    "trap-func", cl::Hidden,
+    cl::desc("Emit a call to trap function rather than a trap instruction"),
+    cl::init(""));
+
+static cl::opt<bool> UseCtors("use-ctors",
+                              cl::desc("Use .ctors instead of .init_array."),
+                              cl::init(false));
+
+static cl::opt<bool> RelaxELFRelocations(
+    "relax-elf-relocations",
+    cl::desc("Emit GOTPCRELX/REX_GOTPCRELX instead of GOTPCREL on x86-64 ELF"),
+    cl::init(false));
+
+static cl::opt<bool> DataSections("data-sections",
+                                  cl::desc("Emit data into separate sections"),
+                                  cl::init(false));
+
+static cl::opt<bool>
+    FunctionSections("function-sections",
+                     cl::desc("Emit functions into separate sections"),
+                     cl::init(false));
+
+static cl::opt<bool> EmulatedTLS("emulated-tls",
+                                 cl::desc("Use emulated TLS model"),
+                                 cl::init(false));
+
+static cl::opt<bool>
+    UniqueSectionNames("unique-section-names",
+                       cl::desc("Give unique names to every section"),
+                       cl::init(true));
+
+static cl::opt<llvm::EABI>
+    EABIVersion("meabi", cl::desc("Set EABI type (default depends on triple):"),
+                cl::init(EABI::Default),
+                cl::values(clEnumValN(EABI::Default, "default",
+                                      "Triple default EABI version"),
+                           clEnumValN(EABI::EABI4, "4", "EABI version 4"),
+                           clEnumValN(EABI::EABI5, "5", "EABI version 5"),
+                           clEnumValN(EABI::GNU, "gnu", "EABI GNU")));
+
+static cl::opt<DebuggerKind> DebuggerTuningOpt(
+    "debugger-tune", cl::desc("Tune debug info for a particular debugger"),
+    cl::init(DebuggerKind::Default),
+    cl::values(clEnumValN(DebuggerKind::GDB, "gdb", "gdb"),
+               clEnumValN(DebuggerKind::LLDB, "lldb", "lldb"),
+               clEnumValN(DebuggerKind::SCE, "sce", "SCE targets (e.g. PS4)")));
+
+static cl::opt<bool> EnableStackSizeSection(
+    "stack-size-section",
+    cl::desc("Emit a section containing stack size metadata"), cl::init(false));
+
+// Common utility function tightly tied to the options listed here. Initializes
+// a TargetOptions object with CodeGen flags and returns it.
+static TargetOptions InitTargetOptionsFromCodeGenFlags() {
+  TargetOptions Options;
+  Options.AllowFPOpFusion = FuseFPOps;
+  Options.UnsafeFPMath = EnableUnsafeFPMath;
+  Options.NoInfsFPMath = EnableNoInfsFPMath;
+  Options.NoNaNsFPMath = EnableNoNaNsFPMath;
+  Options.NoSignedZerosFPMath = EnableNoSignedZerosFPMath;
+  Options.NoTrappingFPMath = EnableNoTrappingFPMath;
+  Options.FPDenormalMode = DenormalMode;
+  Options.HonorSignDependentRoundingFPMathOption =
+      EnableHonorSignDependentRoundingFPMath;
+  if (FloatABIForCalls != FloatABI::Default)
+    Options.FloatABIType = FloatABIForCalls;
+  Options.NoZerosInBSS = DontPlaceZerosInBSS;
+  Options.GuaranteedTailCallOpt = EnableGuaranteedTailCallOpt;
+  Options.StackAlignmentOverride = OverrideStackAlignment;
+  Options.StackSymbolOrdering = StackSymbolOrdering;
+  Options.UseInitArray = !UseCtors;
+  Options.RelaxELFRelocations = RelaxELFRelocations;
+  Options.DataSections = DataSections;
+  Options.FunctionSections = FunctionSections;
+  Options.UniqueSectionNames = UniqueSectionNames;
+  Options.EmulatedTLS = EmulatedTLS;
+  Options.ExceptionModel = ExceptionModel;
+  Options.EmitStackSizeSection = EnableStackSizeSection;
+
+  Options.MCOptions = InitMCTargetOptionsFromFlags();
+
+  Options.ThreadModel = TMModel;
+  Options.EABIVersion = EABIVersion;
+  Options.DebuggerTuning = DebuggerTuningOpt;
+
+  return Options;
+}
+
+LLVM_ATTRIBUTE_UNUSED static std::string getCPUStr() {
+  // If user asked for the 'native' CPU, autodetect here. If autodection fails,
+  // this will set the CPU to an empty string which tells the target to
+  // pick a basic default.
+  if (MCPU == "native")
+    return sys::getHostCPUName();
+
+  return MCPU;
+}
+
+LLVM_ATTRIBUTE_UNUSED static std::string getFeaturesStr() {
+  SubtargetFeatures Features;
+
+  // If user asked for the 'native' CPU, we need to autodetect features.
+  // This is necessary for x86 where the CPU might not support all the
+  // features the autodetected CPU name lists in the target. For example,
+  // not all Sandybridge processors support AVX.
+  if (MCPU == "native") {
+    StringMap<bool> HostFeatures;
+    if (sys::getHostCPUFeatures(HostFeatures))
+      for (auto &F : HostFeatures)
+        Features.AddFeature(F.first(), F.second);
+  }
+
+  for (unsigned i = 0; i != MAttrs.size(); ++i)
+    Features.AddFeature(MAttrs[i]);
+
+  return Features.getString();
+}
+
+/// \brief Set function attributes of functions in Module M based on CPU,
+/// Features, and command line flags.
+LLVM_ATTRIBUTE_UNUSED static void
+setFunctionAttributes(StringRef CPU, StringRef Features, Module &M) {
+  for (auto &F : M) {
+    auto &Ctx = F.getContext();
+    AttributeList Attrs = F.getAttributes();
+    AttrBuilder NewAttrs;
+
+    if (!CPU.empty())
+      NewAttrs.addAttribute("target-cpu", CPU);
+    if (!Features.empty())
+      NewAttrs.addAttribute("target-features", Features);
+    if (DisableFPElim.getNumOccurrences() > 0)
+      NewAttrs.addAttribute("no-frame-pointer-elim",
+                            DisableFPElim ? "true" : "false");
+    if (DisableTailCalls.getNumOccurrences() > 0)
+      NewAttrs.addAttribute("disable-tail-calls",
+                            toStringRef(DisableTailCalls));
+    if (StackRealign)
+      NewAttrs.addAttribute("stackrealign");
+
+    if (TrapFuncName.getNumOccurrences() > 0)
+      for (auto &B : F)
+        for (auto &I : B)
+          if (auto *Call = dyn_cast<CallInst>(&I))
+            if (const auto *F = Call->getCalledFunction())
+              if (F->getIntrinsicID() == Intrinsic::debugtrap ||
+                  F->getIntrinsicID() == Intrinsic::trap)
+                Call->addAttribute(
+                    llvm::AttributeList::FunctionIndex,
+                    Attribute::get(Ctx, "trap-func-name", TrapFuncName));
+
+    // Let NewAttrs override Attrs.
+    F.setAttributes(
+        Attrs.addAttributes(Ctx, AttributeList::FunctionIndex, NewAttrs));
+  }
+}
diff --git a/include/llvm/CodeGen/CommandFlags.h b/include/llvm/CodeGen/CommandFlags.h
deleted file mode 100644
index 0d898827efc61..0000000000000
--- a/include/llvm/CodeGen/CommandFlags.h
+++ /dev/null
@@ -1,382 +0,0 @@
-//===-- CommandFlags.h - Command Line Flags Interface -----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains codegen-specific flags that are shared between different
-// command line tools. The tools "llc" and "opt" both use this file to prevent
-// flag duplication.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_COMMANDFLAGS_H
-#define LLVM_CODEGEN_COMMANDFLAGS_H
-
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Module.h"
-#include "llvm/MC/MCTargetOptionsCommandFlags.h"
-#include "llvm/MC/SubtargetFeature.h"
-#include "llvm/Support/CodeGen.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Host.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include <string>
-using namespace llvm;
-
-cl::opt<std::string>
-MArch("march", cl::desc("Architecture to generate code for (see --version)"));
-
-cl::opt<std::string>
-MCPU("mcpu",
-     cl::desc("Target a specific cpu type (-mcpu=help for details)"),
-     cl::value_desc("cpu-name"),
-     cl::init(""));
-
-cl::list<std::string>
-MAttrs("mattr",
-       cl::CommaSeparated,
-       cl::desc("Target specific attributes (-mattr=help for details)"),
-       cl::value_desc("a1,+a2,-a3,..."));
-
-cl::opt<Reloc::Model> RelocModel(
-    "relocation-model", cl::desc("Choose relocation model"),
-    cl::values(
-        clEnumValN(Reloc::Static, "static", "Non-relocatable code"),
-        clEnumValN(Reloc::PIC_, "pic",
-                   "Fully relocatable, position independent code"),
-        clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic",
-                   "Relocatable external references, non-relocatable code"),
-        clEnumValN(Reloc::ROPI, "ropi",
-                   "Code and read-only data relocatable, accessed PC-relative"),
-        clEnumValN(Reloc::RWPI, "rwpi",
-                   "Read-write data relocatable, accessed relative to static base"),
-        clEnumValN(Reloc::ROPI_RWPI, "ropi-rwpi",
-                   "Combination of ropi and rwpi")));
-
-static inline Optional<Reloc::Model> getRelocModel() {
-  if (RelocModel.getNumOccurrences()) {
-    Reloc::Model R = RelocModel;
-    return R;
-  }
-  return None;
-}
-
-cl::opt<ThreadModel::Model>
-TMModel("thread-model",
-        cl::desc("Choose threading model"),
-        cl::init(ThreadModel::POSIX),
-        cl::values(clEnumValN(ThreadModel::POSIX, "posix",
-                              "POSIX thread model"),
-                   clEnumValN(ThreadModel::Single, "single",
-                              "Single thread model")));
-
-cl::opt<llvm::CodeModel::Model>
-CMModel("code-model",
-        cl::desc("Choose code model"),
-        cl::init(CodeModel::Default),
-        cl::values(clEnumValN(CodeModel::Default, "default",
-                              "Target default code model"),
-                   clEnumValN(CodeModel::Small, "small",
-                              "Small code model"),
-                   clEnumValN(CodeModel::Kernel, "kernel",
-                              "Kernel code model"),
-                   clEnumValN(CodeModel::Medium, "medium",
-                              "Medium code model"),
-                   clEnumValN(CodeModel::Large, "large",
-                              "Large code model")));
-
-cl::opt<llvm::ExceptionHandling>
-ExceptionModel("exception-model",
-               cl::desc("exception model"),
-               cl::init(ExceptionHandling::None),
-               cl::values(clEnumValN(ExceptionHandling::None, "default",
-                                     "default exception handling model"),
-                          clEnumValN(ExceptionHandling::DwarfCFI, "dwarf",
-                                     "DWARF-like CFI based exception handling"),
-                          clEnumValN(ExceptionHandling::SjLj, "sjlj",
-                                     "SjLj exception handling"),
-                          clEnumValN(ExceptionHandling::ARM, "arm",
-                                     "ARM EHABI exceptions"),
-                          clEnumValN(ExceptionHandling::WinEH, "wineh",
-                                     "Windows exception model")));
-
-cl::opt<TargetMachine::CodeGenFileType>
-FileType("filetype", cl::init(TargetMachine::CGFT_AssemblyFile),
-  cl::desc("Choose a file type (not all types are supported by all targets):"),
-  cl::values(
-             clEnumValN(TargetMachine::CGFT_AssemblyFile, "asm",
-                        "Emit an assembly ('.s') file"),
-             clEnumValN(TargetMachine::CGFT_ObjectFile, "obj",
-                        "Emit a native object ('.o') file"),
-             clEnumValN(TargetMachine::CGFT_Null, "null",
-                        "Emit nothing, for performance testing")));
-
-cl::opt<bool>
-DisableFPElim("disable-fp-elim",
-              cl::desc("Disable frame pointer elimination optimization"),
-              cl::init(false));
-
-cl::opt<bool>
-EnableUnsafeFPMath("enable-unsafe-fp-math",
-                cl::desc("Enable optimizations that may decrease FP precision"),
-                cl::init(false));
-
-cl::opt<bool>
-EnableNoInfsFPMath("enable-no-infs-fp-math",
-                cl::desc("Enable FP math optimizations that assume no +-Infs"),
-                cl::init(false));
-
-cl::opt<bool>
-EnableNoNaNsFPMath("enable-no-nans-fp-math",
-                   cl::desc("Enable FP math optimizations that assume no NaNs"),
-                   cl::init(false));
-
-cl::opt<bool>
-EnableNoSignedZerosFPMath("enable-no-signed-zeros-fp-math",
-                          cl::desc("Enable FP math optimizations that assume "
-                                   "the sign of 0 is insignificant"),
-                          cl::init(false));
-
-cl::opt<bool>
-EnableNoTrappingFPMath("enable-no-trapping-fp-math",
-                       cl::desc("Enable setting the FP exceptions build "
-                                "attribute not to use exceptions"),
-                       cl::init(false));
-
-cl::opt<llvm::FPDenormal::DenormalMode>
-DenormalMode("denormal-fp-math",
-          cl::desc("Select which denormal numbers the code is permitted to require"),
-          cl::init(FPDenormal::IEEE),
-          cl::values(
-              clEnumValN(FPDenormal::IEEE, "ieee",
-                         "IEEE 754 denormal numbers"),
-              clEnumValN(FPDenormal::PreserveSign, "preserve-sign",
-                         "the sign of a  flushed-to-zero number is preserved "
-                         "in the sign of 0"),
-              clEnumValN(FPDenormal::PositiveZero, "positive-zero",
-                         "denormals are flushed to positive zero")));
-
-cl::opt<bool>
-EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math",
-      cl::Hidden,
-      cl::desc("Force codegen to assume rounding mode can change dynamically"),
-      cl::init(false));
-
-cl::opt<llvm::FloatABI::ABIType>
-FloatABIForCalls("float-abi",
-                 cl::desc("Choose float ABI type"),
-                 cl::init(FloatABI::Default),
-                 cl::values(
-                     clEnumValN(FloatABI::Default, "default",
-                                "Target default float ABI type"),
-                     clEnumValN(FloatABI::Soft, "soft",
-                                "Soft float ABI (implied by -soft-float)"),
-                     clEnumValN(FloatABI::Hard, "hard",
-                                "Hard float ABI (uses FP registers)")));
-
-cl::opt<llvm::FPOpFusion::FPOpFusionMode>
-FuseFPOps("fp-contract",
-          cl::desc("Enable aggressive formation of fused FP ops"),
-          cl::init(FPOpFusion::Standard),
-          cl::values(
-              clEnumValN(FPOpFusion::Fast, "fast",
-                         "Fuse FP ops whenever profitable"),
-              clEnumValN(FPOpFusion::Standard, "on",
-                         "Only fuse 'blessed' FP ops."),
-              clEnumValN(FPOpFusion::Strict, "off",
-                         "Only fuse FP ops when the result won't be affected.")));
-
-cl::opt<bool>
-DontPlaceZerosInBSS("nozero-initialized-in-bss",
-              cl::desc("Don't place zero-initialized symbols into bss section"),
-              cl::init(false));
-
-cl::opt<bool>
-EnableGuaranteedTailCallOpt("tailcallopt",
-  cl::desc("Turn fastcc calls into tail calls by (potentially) changing ABI."),
-  cl::init(false));
-
-cl::opt<bool>
-DisableTailCalls("disable-tail-calls",
-                 cl::desc("Never emit tail calls"),
-                 cl::init(false));
-
-cl::opt<bool>
-StackSymbolOrdering("stack-symbol-ordering",
-                    cl::desc("Order local stack symbols."),
-                    cl::init(true));
-
-cl::opt<unsigned>
-OverrideStackAlignment("stack-alignment",
-                       cl::desc("Override default stack alignment"),
-                       cl::init(0));
-
-cl::opt<bool>
-StackRealign("stackrealign",
-             cl::desc("Force align the stack to the minimum alignment"),
-             cl::init(false));
-
-cl::opt<std::string>
-TrapFuncName("trap-func", cl::Hidden,
-        cl::desc("Emit a call to trap function rather than a trap instruction"),
-        cl::init(""));
-
-cl::opt<bool>
-UseCtors("use-ctors",
-             cl::desc("Use .ctors instead of .init_array."),
-             cl::init(false));
-
-cl::opt<bool> RelaxELFRelocations(
-    "relax-elf-relocations",
-    cl::desc("Emit GOTPCRELX/REX_GOTPCRELX instead of GOTPCREL on x86-64 ELF"),
-    cl::init(false));
-
-cl::opt<bool> DataSections("data-sections",
-                           cl::desc("Emit data into separate sections"),
-                           cl::init(false));
-
-cl::opt<bool>
-FunctionSections("function-sections",
-                 cl::desc("Emit functions into separate sections"),
-                 cl::init(false));
-
-cl::opt<bool> EmulatedTLS("emulated-tls",
-                          cl::desc("Use emulated TLS model"),
-                          cl::init(false));
-
-cl::opt<bool> UniqueSectionNames("unique-section-names",
-                                 cl::desc("Give unique names to every section"),
-                                 cl::init(true));
-
-cl::opt<llvm::EABI> EABIVersion(
-    "meabi", cl::desc("Set EABI type (default depends on triple):"),
-    cl::init(EABI::Default),
-    cl::values(clEnumValN(EABI::Default, "default",
-                          "Triple default EABI version"),
-               clEnumValN(EABI::EABI4, "4", "EABI version 4"),
-               clEnumValN(EABI::EABI5, "5", "EABI version 5"),
-               clEnumValN(EABI::GNU, "gnu", "EABI GNU")));
-
-cl::opt<DebuggerKind>
-DebuggerTuningOpt("debugger-tune",
-                  cl::desc("Tune debug info for a particular debugger"),
-                  cl::init(DebuggerKind::Default),
-                  cl::values(
-                      clEnumValN(DebuggerKind::GDB, "gdb", "gdb"),
-                      clEnumValN(DebuggerKind::LLDB, "lldb", "lldb"),
-                      clEnumValN(DebuggerKind::SCE, "sce",
-                                 "SCE targets (e.g. PS4)")));
-
-// Common utility function tightly tied to the options listed here. Initializes
-// a TargetOptions object with CodeGen flags and returns it.
-static inline TargetOptions InitTargetOptionsFromCodeGenFlags() {
-  TargetOptions Options;
-  Options.AllowFPOpFusion = FuseFPOps;
-  Options.UnsafeFPMath = EnableUnsafeFPMath;
-  Options.NoInfsFPMath = EnableNoInfsFPMath;
-  Options.NoNaNsFPMath = EnableNoNaNsFPMath;
-  Options.NoSignedZerosFPMath = EnableNoSignedZerosFPMath;
-  Options.NoTrappingFPMath = EnableNoTrappingFPMath;
-  Options.FPDenormalMode = DenormalMode;
-  Options.HonorSignDependentRoundingFPMathOption =
-      EnableHonorSignDependentRoundingFPMath;
-  if (FloatABIForCalls != FloatABI::Default)
-    Options.FloatABIType = FloatABIForCalls;
-  Options.NoZerosInBSS = DontPlaceZerosInBSS;
-  Options.GuaranteedTailCallOpt = EnableGuaranteedTailCallOpt;
-  Options.StackAlignmentOverride = OverrideStackAlignment;
-  Options.StackSymbolOrdering = StackSymbolOrdering;
-  Options.UseInitArray = !UseCtors;
-  Options.RelaxELFRelocations = RelaxELFRelocations;
-  Options.DataSections = DataSections;
-  Options.FunctionSections = FunctionSections;
-  Options.UniqueSectionNames = UniqueSectionNames;
-  Options.EmulatedTLS = EmulatedTLS;
-  Options.ExceptionModel = ExceptionModel;
-
-  Options.MCOptions = InitMCTargetOptionsFromFlags();
-
-  Options.ThreadModel = TMModel;
-  Options.EABIVersion = EABIVersion;
-  Options.DebuggerTuning = DebuggerTuningOpt;
-
-  return Options;
-}
-
-static inline std::string getCPUStr() {
-  // If user asked for the 'native' CPU, autodetect here. If autodection fails,
-  // this will set the CPU to an empty string which tells the target to
-  // pick a basic default.
-  if (MCPU == "native")
-    return sys::getHostCPUName();
-
-  return MCPU;
-}
-
-static inline std::string getFeaturesStr() {
-  SubtargetFeatures Features;
-
-  // If user asked for the 'native' CPU, we need to autodetect features.
-  // This is necessary for x86 where the CPU might not support all the
-  // features the autodetected CPU name lists in the target. For example,
-  // not all Sandybridge processors support AVX.
-  if (MCPU == "native") {
-    StringMap<bool> HostFeatures;
-    if (sys::getHostCPUFeatures(HostFeatures))
-      for (auto &F : HostFeatures)
-        Features.AddFeature(F.first(), F.second);
-  }
-
-  for (unsigned i = 0; i != MAttrs.size(); ++i)
-    Features.AddFeature(MAttrs[i]);
-
-  return Features.getString();
-}
-
-/// \brief Set function attributes of functions in Module M based on CPU,
-/// Features, and command line flags.
-static inline void setFunctionAttributes(StringRef CPU, StringRef Features,
-                                         Module &M) {
-  for (auto &F : M) {
-    auto &Ctx = F.getContext();
-    AttributeList Attrs = F.getAttributes();
-    AttrBuilder NewAttrs;
-
-    if (!CPU.empty())
-      NewAttrs.addAttribute("target-cpu", CPU);
-    if (!Features.empty())
-      NewAttrs.addAttribute("target-features", Features);
-    if (DisableFPElim.getNumOccurrences() > 0)
-      NewAttrs.addAttribute("no-frame-pointer-elim",
-                            DisableFPElim ? "true" : "false");
-    if (DisableTailCalls.getNumOccurrences() > 0)
-      NewAttrs.addAttribute("disable-tail-calls",
-                            toStringRef(DisableTailCalls));
-    if (StackRealign)
-      NewAttrs.addAttribute("stackrealign");
-
-    if (TrapFuncName.getNumOccurrences() > 0)
-      for (auto &B : F)
-        for (auto &I : B)
-          if (auto *Call = dyn_cast<CallInst>(&I))
-            if (const auto *F = Call->getCalledFunction())
-              if (F->getIntrinsicID() == Intrinsic::debugtrap ||
-                  F->getIntrinsicID() == Intrinsic::trap)
-                Call->addAttribute(
-                    llvm::AttributeList::FunctionIndex,
-                    Attribute::get(Ctx, "trap-func-name", TrapFuncName));
-
-    // Let NewAttrs override Attrs.
-    F.setAttributes(
-        Attrs.addAttributes(Ctx, AttributeList::FunctionIndex, NewAttrs));
-  }
-}
-
-#endif
diff --git a/include/llvm/CodeGen/CostTable.h b/include/llvm/CodeGen/CostTable.h
new file mode 100644
index 0000000000000..5a6368c5a0f8a
--- /dev/null
+++ b/include/llvm/CodeGen/CostTable.h
@@ -0,0 +1,69 @@
+//===-- CostTable.h - Instruction Cost Table handling -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Cost tables and simple lookup functions
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_COSTTABLE_H_
+#define LLVM_CODEGEN_COSTTABLE_H_
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineValueType.h"
+
+namespace llvm {
+
+/// Cost Table Entry
+struct CostTblEntry {
+  int ISD;
+  MVT::SimpleValueType Type;
+  unsigned Cost;
+};
+
+/// Find in cost table, TypeTy must be comparable to CompareTy by ==
+inline const CostTblEntry *CostTableLookup(ArrayRef<CostTblEntry> Tbl,
+                                           int ISD, MVT Ty) {
+  auto I = find_if(Tbl, [=](const CostTblEntry &Entry) {
+    return ISD == Entry.ISD && Ty == Entry.Type;
+  });
+  if (I != Tbl.end())
+    return I;
+
+  // Could not find an entry.
+  return nullptr;
+}
+
+/// Type Conversion Cost Table
+struct TypeConversionCostTblEntry {
+  int ISD;
+  MVT::SimpleValueType Dst;
+  MVT::SimpleValueType Src;
+  unsigned Cost;
+};
+
+/// Find in type conversion cost table, TypeTy must be comparable to CompareTy
+/// by ==
+inline const TypeConversionCostTblEntry *
+ConvertCostTableLookup(ArrayRef<TypeConversionCostTblEntry> Tbl,
+                       int ISD, MVT Dst, MVT Src) {
+  auto I = find_if(Tbl, [=](const TypeConversionCostTblEntry &Entry) {
+    return ISD == Entry.ISD && Src == Entry.Src && Dst == Entry.Dst;
+  });
+  if (I != Tbl.end())
+    return I;
+
+  // Could not find an entry.
+  return nullptr;
+}
+
+} // namespace llvm
+
+#endif /* LLVM_CODEGEN_COSTTABLE_H_ */
diff --git a/include/llvm/CodeGen/DFAPacketizer.h b/include/llvm/CodeGen/DFAPacketizer.h
index 77c37ac7abeae..d3aabe22f2165 100644
--- a/include/llvm/CodeGen/DFAPacketizer.h
+++ b/include/llvm/CodeGen/DFAPacketizer.h
@@ -208,6 +208,13 @@ public:
 
   // Add a DAG mutation to be done before the packetization begins.
   void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation);
+
+  bool alias(const MachineInstr &MI1, const MachineInstr &MI2,
+             bool UseTBAA = true) const;
+
+private:
+  bool alias(const MachineMemOperand &Op1, const MachineMemOperand &Op2,
+             bool UseTBAA = true) const;
 };
 
 } // end namespace llvm
diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h
index 74e4179e73e98..85bb826dcb8c4 100644
--- a/include/llvm/CodeGen/FastISel.h
+++ b/include/llvm/CodeGen/FastISel.h
@@ -20,6 +20,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/CallingConv.h"
@@ -27,7 +28,6 @@
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Target/TargetLowering.h"
 #include <algorithm>
 #include <cstdint>
 #include <utility>
diff --git a/include/llvm/CodeGen/FaultMaps.h b/include/llvm/CodeGen/FaultMaps.h
index 98ff526dfe946..55e25c9823b17 100644
--- a/include/llvm/CodeGen/FaultMaps.h
+++ b/include/llvm/CodeGen/FaultMaps.h
@@ -39,6 +39,9 @@ public:
 
   void recordFaultingOp(FaultKind FaultTy, const MCSymbol *HandlerLabel);
   void serializeToFaultMapSection();
+  void reset() {
+    FunctionInfos.clear();
+  }
 
 private:
   static const char *WFMP;
diff --git a/include/llvm/CodeGen/FunctionLoweringInfo.h b/include/llvm/CodeGen/FunctionLoweringInfo.h
index f32a58915118f..3b39d87ffb4a4 100644
--- a/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -23,11 +23,11 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/KnownBits.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include <cassert>
 #include <utility>
 #include <vector>
diff --git a/include/llvm/CodeGen/GlobalISel/CallLowering.h b/include/llvm/CodeGen/GlobalISel/CallLowering.h
index e7ce1946889e3..ba84d76de1649 100644
--- a/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ b/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -18,10 +18,10 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/TargetCallingConv.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetCallingConv.h"
 #include <cstdint>
 #include <functional>
 
diff --git a/include/llvm/CodeGen/GlobalISel/GISelAccessor.h b/include/llvm/CodeGen/GlobalISel/GISelAccessor.h
deleted file mode 100644
index 8dea38059ea47..0000000000000
--- a/include/llvm/CodeGen/GlobalISel/GISelAccessor.h
+++ /dev/null
@@ -1,39 +0,0 @@
-//===-- GISelAccessor.h - GISel Accessor ------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// This file declares the API to access the various APIs related
-/// to GlobalISel.
-//
-//===----------------------------------------------------------------------===/
-
-#ifndef LLVM_CODEGEN_GLOBALISEL_GISELACCESSOR_H
-#define LLVM_CODEGEN_GLOBALISEL_GISELACCESSOR_H
-
-namespace llvm {
-class CallLowering;
-class InstructionSelector;
-class LegalizerInfo;
-class RegisterBankInfo;
-
-/// The goal of this helper class is to gather the accessor to all
-/// the APIs related to GlobalISel.
-/// It should be derived to feature an actual accessor to the GISel APIs.
-/// The reason why this is not simply done into the subtarget is to avoid
-/// spreading ifdefs around.
-struct GISelAccessor {
-  virtual ~GISelAccessor() {}
-  virtual const CallLowering *getCallLowering() const { return nullptr;}
-  virtual const InstructionSelector *getInstructionSelector() const {
-    return nullptr;
-  }
-  virtual const LegalizerInfo *getLegalizerInfo() const { return nullptr; }
-  virtual const RegisterBankInfo *getRegBankInfo() const { return nullptr;}
-};
-} // End namespace llvm;
-#endif
diff --git a/include/llvm/CodeGen/GlobalISel/GISelWorkList.h b/include/llvm/CodeGen/GlobalISel/GISelWorkList.h
new file mode 100644
index 0000000000000..167905dc9aa1a
--- /dev/null
+++ b/include/llvm/CodeGen/GlobalISel/GISelWorkList.h
@@ -0,0 +1,69 @@
+//===- GISelWorkList.h - Worklist for GISel passes ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_GISEL_WORKLIST_H
+#define LLVM_GISEL_WORKLIST_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+
+namespace llvm {
+
+class MachineInstr;
+
+// Worklist which mostly works similar to InstCombineWorkList, but on MachineInstrs.
+// The main difference with something like a SetVector is that erasing an element doesn't
+// move all elements over one place - instead just nulls out the element of the vector.
+// FIXME: Does it make sense to factor out common code with the instcombinerWorkList?
+template<unsigned N>
+class GISelWorkList {
+  SmallVector<MachineInstr*, N> Worklist;
+  DenseMap<MachineInstr*, unsigned> WorklistMap;
+
+public:
+  GISelWorkList() = default;
+
+  bool empty() const { return WorklistMap.empty(); }
+
+  unsigned size() const { return WorklistMap.size(); }
+
+  /// Add - Add the specified instruction to the worklist if it isn't already
+  /// in it.
+  void insert(MachineInstr *I) {
+    if (WorklistMap.try_emplace(I, Worklist.size()).second) {
+      Worklist.push_back(I);
+    }
+  }
+
+  /// Remove - remove I from the worklist if it exists.
+  void remove(MachineInstr *I) {
+    auto It = WorklistMap.find(I);
+    if (It == WorklistMap.end()) return; // Not in worklist.
+
+    // Don't bother moving everything down, just null out the slot.
+    Worklist[It->second] = nullptr;
+
+    WorklistMap.erase(It);
+  }
+
+  MachineInstr *pop_back_val() {
+    MachineInstr *I;
+    do {
+      I = Worklist.pop_back_val();
+    } while(!I);
+    assert(I && "Pop back on empty worklist");
+    WorklistMap.erase(I);
+    return I;
+  }
+};
+
+} // end namespace llvm.
+
+#endif
diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
index 1060d8fd667e6..e599a1b179ece 100644
--- a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
+++ b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
@@ -16,7 +16,10 @@
 #ifndef LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTOR_H
 #define LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTOR_H
 
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CodeGenCoverage.h"
 #include <bitset>
 #include <cstddef>
 #include <cstdint>
@@ -26,9 +29,12 @@
 
 namespace llvm {
 
+class APInt;
+class APFloat;
 class LLT;
 class MachineInstr;
 class MachineInstrBuilder;
+class MachineFunction;
 class MachineOperand;
 class MachineRegisterInfo;
 class RegisterBankInfo;
@@ -63,6 +69,18 @@ public:
 };
 
 enum {
+  /// Begin a try-block to attempt a match and jump to OnFail if it is
+  /// unsuccessful.
+  /// - OnFail - The MatchTable entry at which to resume if the match fails.
+  ///
+  /// FIXME: This ought to take an argument indicating the number of try-blocks
+  ///        to exit on failure. It's usually one but the last match attempt of
+  ///        a block will need more. The (implemented) alternative is to tack a
+  ///        GIM_Reject on the end of each try-block which is simpler but
+  ///        requires an extra opcode and iteration in the interpreter on each
+  ///        failed match.
+  GIM_Try,
+
   /// Record the specified instruction
   /// - NewInsnID - Instruction ID to define
   /// - InsnID - Instruction ID
@@ -81,12 +99,35 @@ enum {
   /// - InsnID - Instruction ID
   /// - Expected number of operands
   GIM_CheckNumOperands,
+  /// Check an immediate predicate on the specified instruction
+  /// - InsnID - Instruction ID
+  /// - The predicate to test
+  GIM_CheckI64ImmPredicate,
+  /// Check an immediate predicate on the specified instruction via an APInt.
+  /// - InsnID - Instruction ID
+  /// - The predicate to test
+  GIM_CheckAPIntImmPredicate,
+  /// Check a floating point immediate predicate on the specified instruction.
+  /// - InsnID - Instruction ID
+  /// - The predicate to test
+  GIM_CheckAPFloatImmPredicate,
+  /// Check a memory operation has the specified atomic ordering.
+  /// - InsnID - Instruction ID
+  /// - Ordering - The AtomicOrdering value
+  GIM_CheckAtomicOrdering,
+  GIM_CheckAtomicOrderingOrStrongerThan,
+  GIM_CheckAtomicOrderingWeakerThan,
 
   /// Check the type for the specified operand
   /// - InsnID - Instruction ID
   /// - OpIdx - Operand index
   /// - Expected type
   GIM_CheckType,
+  /// Check the type of a pointer to any address space.
+  /// - InsnID - Instruction ID
+  /// - OpIdx - Operand index
+  /// - SizeInBits - The size of the pointer value in bits.
+  GIM_CheckPointerToAny,
   /// Check the register bank for the specified operand
   /// - InsnID - Instruction ID
   /// - OpIdx - Operand index
@@ -124,6 +165,17 @@ enum {
   /// - InsnID - Instruction ID
   GIM_CheckIsSafeToFold,
 
+  /// Check the specified operands are identical.
+  /// - InsnID - Instruction ID
+  /// - OpIdx - Operand index
+  /// - OtherInsnID - Other instruction ID
+  /// - OtherOpIdx - Other operand index
+  GIM_CheckIsSameOperand,
+
+  /// Fail the current try-block, or completely fail to match if there is no
+  /// current try-block.
+  GIM_Reject,
+
   //=== Renderers ===
 
   /// Mutate an instruction
@@ -141,6 +193,13 @@ enum {
   /// - OldInsnID - Instruction ID to copy from
   /// - OpIdx - The operand to copy
   GIR_Copy,
+  /// Copy an operand to the specified instruction or add a zero register if the
+  /// operand is a zero immediate.
+  /// - NewInsnID - Instruction ID to modify
+  /// - OldInsnID - Instruction ID to copy from
+  /// - OpIdx - The operand to copy
+  /// - ZeroReg - The zero register to use
+  GIR_CopyOrAddZeroReg,
   /// Copy an operand to the specified instruction
   /// - NewInsnID - Instruction ID to modify
   /// - OldInsnID - Instruction ID to copy from
@@ -159,6 +218,10 @@ enum {
   /// - InsnID - Instruction ID to modify
   /// - RegNum - The register to add
   GIR_AddRegister,
+  /// Add a a temporary register to the specified instruction
+  /// - InsnID - Instruction ID to modify
+  /// - TempRegID - The temporary register ID to add
+  GIR_AddTempRegister,
   /// Add an immediate to the specified instruction
   /// - InsnID - Instruction ID to modify
   /// - Imm - The immediate to add
@@ -167,6 +230,17 @@ enum {
   /// - InsnID - Instruction ID to modify
   /// - RendererID - The renderer to call
   GIR_ComplexRenderer,
+  /// Render sub-operands of complex operands to the specified instruction
+  /// - InsnID - Instruction ID to modify
+  /// - RendererID - The renderer to call
+  /// - RenderOpID - The suboperand to render.
+  GIR_ComplexSubOperandRenderer,
+
+  /// Render a G_CONSTANT operator as a sign-extended immediate.
+  /// - NewInsnID - Instruction ID to modify
+  /// - OldInsnID - Instruction ID to copy from
+  /// The operand index is implicitly 1.
+  GIR_CopyConstantAsSImm,
 
   /// Constrain an instruction operand to a register class.
   /// - InsnID - Instruction ID to modify
@@ -179,18 +253,39 @@ enum {
   GIR_ConstrainSelectedInstOperands,
   /// Merge all memory operands into instruction.
   /// - InsnID - Instruction ID to modify
+  /// - MergeInsnID... - One or more Instruction ID to merge into the result.
+  /// - GIU_MergeMemOperands_EndOfList - Terminates the list of instructions to
+  ///                                    merge.
   GIR_MergeMemOperands,
   /// Erase from parent.
   /// - InsnID - Instruction ID to erase
   GIR_EraseFromParent,
+  /// Create a new temporary register that's not constrained.
+  /// - TempRegID - The temporary register ID to initialize.
+  /// - Expected type
+  GIR_MakeTempReg,
 
   /// A successful emission
   GIR_Done,
+
+  /// Increment the rule coverage counter.
+  /// - RuleID - The ID of the rule that was covered.
+  GIR_Coverage,
+};
+
+enum {
+  /// Indicates the end of the variable-length MergeInsnID list in a
+  /// GIR_MergeMemOperands opcode.
+  GIU_MergeMemOperands_EndOfList = -1,
 };
 
 /// Provides the logic to select generic machine instructions.
 class InstructionSelector {
 public:
+  using I64ImmediatePredicateFn = bool (*)(int64_t);
+  using APIntImmediatePredicateFn = bool (*)(const APInt &);
+  using APFloatImmediatePredicateFn = bool (*)(const APFloat &);
+
   virtual ~InstructionSelector() = default;
 
   /// Select the (possibly generic) instruction \p I to only use target-specific
@@ -203,17 +298,18 @@ public:
   ///   if returns true:
   ///     for I in all mutated/inserted instructions:
   ///       !isPreISelGenericOpcode(I.getOpcode())
-  ///
-  virtual bool select(MachineInstr &I) const = 0;
+  virtual bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const = 0;
 
 protected:
-  using ComplexRendererFn = std::function<void(MachineInstrBuilder &)>;
+  using ComplexRendererFns =
+      Optional<SmallVector<std::function<void(MachineInstrBuilder &)>, 4>>;
   using RecordedMIVector = SmallVector<MachineInstr *, 4>;
   using NewMIVector = SmallVector<MachineInstrBuilder, 4>;
 
   struct MatcherState {
-    std::vector<ComplexRendererFn> Renderers;
+    std::vector<ComplexRendererFns::value_type> Renderers;
     RecordedMIVector MIs;
+    DenseMap<unsigned, unsigned> TempRegisters;
 
     MatcherState(unsigned MaxRenderers);
   };
@@ -223,7 +319,10 @@ public:
   struct MatcherInfoTy {
     const LLT *TypeObjects;
     const PredicateBitset *FeatureBitsets;
-    const std::vector<ComplexMatcherMemFn> ComplexPredicates;
+    const I64ImmediatePredicateFn *I64ImmPredicateFns;
+    const APIntImmediatePredicateFn *APIntImmPredicateFns;
+    const APFloatImmediatePredicateFn *APFloatImmPredicateFns;
+    const ComplexMatcherMemFn *ComplexPredicates;
   };
 
 protected:
@@ -238,8 +337,8 @@ protected:
       const MatcherInfoTy<PredicateBitset, ComplexMatcherMemFn> &MatcherInfo,
       const int64_t *MatchTable, const TargetInstrInfo &TII,
       MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
-      const RegisterBankInfo &RBI,
-      const PredicateBitset &AvailableFeatures) const;
+      const RegisterBankInfo &RBI, const PredicateBitset &AvailableFeatures,
+      CodeGenCoverage &CoverageInfo) const;
 
   /// Constrain a register operand of an instruction \p I to a specified
   /// register class. This could involve inserting COPYs before (for uses) or
@@ -268,7 +367,16 @@ protected:
   bool isOperandImmEqual(const MachineOperand &MO, int64_t Value,
                          const MachineRegisterInfo &MRI) const;
 
-  bool isObviouslySafeToFold(MachineInstr &MI) const;
+  /// Return true if the specified operand is a G_GEP with a G_CONSTANT on the
+  /// right-hand side. GlobalISel's separation of pointer and integer types
+  /// means that we don't need to worry about G_OR with equivalent semantics.
+  bool isBaseWithConstantOffset(const MachineOperand &Root,
+                                const MachineRegisterInfo &MRI) const;
+
+  /// Return true if MI can obviously be folded into IntoMI.
+  /// MI and IntoMI do not need to be in the same basic blocks, but MI must
+  /// preceed IntoMI.
+  bool isObviouslySafeToFold(MachineInstr &MI, MachineInstr &IntoMI) const;
 };
 
 } // end namespace llvm
diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
index 98b6b859b9e26..ac2c055ab1452 100644
--- a/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
+++ b/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
@@ -1,4 +1,4 @@
-//==-- llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h ---------*- C++ -*-==//
+//===- llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h --------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,7 +16,32 @@
 #ifndef LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTORIMPL_H
 #define LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTORIMPL_H
 
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+
 namespace llvm {
+
+/// GlobalISel PatFrag Predicates
+enum {
+  GIPFP_I64_Invalid = 0,
+  GIPFP_APInt_Invalid = 0,
+  GIPFP_APFloat_Invalid = 0,
+};
+
 template <class TgtInstructionSelector, class PredicateBitset,
           class ComplexMatcherMemFn>
 bool InstructionSelector::executeMatchTable(
@@ -24,306 +49,687 @@ bool InstructionSelector::executeMatchTable(
     const MatcherInfoTy<PredicateBitset, ComplexMatcherMemFn> &MatcherInfo,
     const int64_t *MatchTable, const TargetInstrInfo &TII,
     MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
-    const RegisterBankInfo &RBI,
-    const PredicateBitset &AvailableFeatures) const {
-  const int64_t *Command = MatchTable;
+    const RegisterBankInfo &RBI, const PredicateBitset &AvailableFeatures,
+    CodeGenCoverage &CoverageInfo) const {
+  uint64_t CurrentIdx = 0;
+  SmallVector<uint64_t, 8> OnFailResumeAt;
+
+  enum RejectAction { RejectAndGiveUp, RejectAndResume };
+  auto handleReject = [&]() -> RejectAction {
+    DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                    dbgs() << CurrentIdx << ": Rejected\n");
+    if (OnFailResumeAt.empty())
+      return RejectAndGiveUp;
+    CurrentIdx = OnFailResumeAt.back();
+    OnFailResumeAt.pop_back();
+    DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                    dbgs() << CurrentIdx << ": Resume at " << CurrentIdx << " ("
+                           << OnFailResumeAt.size() << " try-blocks remain)\n");
+    return RejectAndResume;
+  };
+
   while (true) {
-    switch (*Command++) {
+    assert(CurrentIdx != ~0u && "Invalid MatchTable index");
+    switch (MatchTable[CurrentIdx++]) {
+    case GIM_Try: {
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": Begin try-block\n");
+      OnFailResumeAt.push_back(MatchTable[CurrentIdx++]);
+      break;
+    }
+
     case GIM_RecordInsn: {
-      int64_t NewInsnID = *Command++;
-      int64_t InsnID = *Command++;
-      int64_t OpIdx = *Command++;
+      int64_t NewInsnID = MatchTable[CurrentIdx++];
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t OpIdx = MatchTable[CurrentIdx++];
 
       // As an optimisation we require that MIs[0] is always the root. Refuse
       // any attempt to modify it.
       assert(NewInsnID != 0 && "Refusing to modify MIs[0]");
-      (void)NewInsnID;
 
       MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx);
       if (!MO.isReg()) {
-        DEBUG(dbgs() << "Rejected (not a register)\n");
-        return false;
+        DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                        dbgs() << CurrentIdx << ": Not a register\n");
+        if (handleReject() == RejectAndGiveUp)
+          return false;
+        break;
       }
       if (TRI.isPhysicalRegister(MO.getReg())) {
-        DEBUG(dbgs() << "Rejected (is a physical register)\n");
-        return false;
+        DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                        dbgs() << CurrentIdx << ": Is a physical register\n");
+        if (handleReject() == RejectAndGiveUp)
+          return false;
+        break;
       }
 
-      assert((size_t)NewInsnID == State.MIs.size() &&
-             "Expected to store MIs in order");
-      State.MIs.push_back(MRI.getVRegDef(MO.getReg()));
-      DEBUG(dbgs() << "MIs[" << NewInsnID << "] = GIM_RecordInsn(" << InsnID
-                   << ", " << OpIdx << ")\n");
+      MachineInstr *NewMI = MRI.getVRegDef(MO.getReg());
+      if ((size_t)NewInsnID < State.MIs.size())
+        State.MIs[NewInsnID] = NewMI;
+      else {
+        assert((size_t)NewInsnID == State.MIs.size() &&
+               "Expected to store MIs in order");
+        State.MIs.push_back(NewMI);
+      }
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": MIs[" << NewInsnID
+                             << "] = GIM_RecordInsn(" << InsnID << ", " << OpIdx
+                             << ")\n");
       break;
     }
 
     case GIM_CheckFeatures: {
-      int64_t ExpectedBitsetID = *Command++;
-      DEBUG(dbgs() << "GIM_CheckFeatures(ExpectedBitsetID=" << ExpectedBitsetID
-                   << ")\n");
+      int64_t ExpectedBitsetID = MatchTable[CurrentIdx++];
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx
+                             << ": GIM_CheckFeatures(ExpectedBitsetID="
+                             << ExpectedBitsetID << ")\n");
       if ((AvailableFeatures & MatcherInfo.FeatureBitsets[ExpectedBitsetID]) !=
           MatcherInfo.FeatureBitsets[ExpectedBitsetID]) {
-        DEBUG(dbgs() << "Rejected\n");
-        return false;
+        if (handleReject() == RejectAndGiveUp)
+          return false;
       }
       break;
     }
 
     case GIM_CheckOpcode: {
-      int64_t InsnID = *Command++;
-      int64_t Expected = *Command++;
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t Expected = MatchTable[CurrentIdx++];
 
       unsigned Opcode = State.MIs[InsnID]->getOpcode();
-      DEBUG(dbgs() << "GIM_CheckOpcode(MIs[" << InsnID << "], ExpectedOpcode="
-                   << Expected << ") // Got=" << Opcode << "\n");
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": GIM_CheckOpcode(MIs[" << InsnID
+                             << "], ExpectedOpcode=" << Expected
+                             << ") // Got=" << Opcode << "\n");
       assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
-      if (Opcode != Expected)
-        return false;
+      if (Opcode != Expected) {
+        if (handleReject() == RejectAndGiveUp)
+          return false;
+      }
       break;
     }
+
     case GIM_CheckNumOperands: {
-      int64_t InsnID = *Command++;
-      int64_t Expected = *Command++;
-      DEBUG(dbgs() << "GIM_CheckNumOperands(MIs[" << InsnID
-                   << "], Expected=" << Expected << ")\n");
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t Expected = MatchTable[CurrentIdx++];
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": GIM_CheckNumOperands(MIs["
+                             << InsnID << "], Expected=" << Expected << ")\n");
       assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
-      if (State.MIs[InsnID]->getNumOperands() != Expected)
-        return false;
+      if (State.MIs[InsnID]->getNumOperands() != Expected) {
+        if (handleReject() == RejectAndGiveUp)
+          return false;
+      }
+      break;
+    }
+    case GIM_CheckI64ImmPredicate: {
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t Predicate = MatchTable[CurrentIdx++];
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs()
+                          << CurrentIdx << ": GIM_CheckI64ImmPredicate(MIs["
+                          << InsnID << "], Predicate=" << Predicate << ")\n");
+      assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
+      assert(State.MIs[InsnID]->getOpcode() == TargetOpcode::G_CONSTANT &&
+             "Expected G_CONSTANT");
+      assert(Predicate > GIPFP_I64_Invalid && "Expected a valid predicate");
+      int64_t Value = 0;
+      if (State.MIs[InsnID]->getOperand(1).isCImm())
+        Value = State.MIs[InsnID]->getOperand(1).getCImm()->getSExtValue();
+      else if (State.MIs[InsnID]->getOperand(1).isImm())
+        Value = State.MIs[InsnID]->getOperand(1).getImm();
+      else
+        llvm_unreachable("Expected Imm or CImm operand");
+
+      if (!MatcherInfo.I64ImmPredicateFns[Predicate](Value))
+        if (handleReject() == RejectAndGiveUp)
+          return false;
       break;
     }
+    case GIM_CheckAPIntImmPredicate: {
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t Predicate = MatchTable[CurrentIdx++];
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs()
+                          << CurrentIdx << ": GIM_CheckAPIntImmPredicate(MIs["
+                          << InsnID << "], Predicate=" << Predicate << ")\n");
+      assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
+      assert(State.MIs[InsnID]->getOpcode() && "Expected G_CONSTANT");
+      assert(Predicate > GIPFP_APInt_Invalid && "Expected a valid predicate");
+      APInt Value;
+      if (State.MIs[InsnID]->getOperand(1).isCImm())
+        Value = State.MIs[InsnID]->getOperand(1).getCImm()->getValue();
+      else
+        llvm_unreachable("Expected Imm or CImm operand");
+
+      if (!MatcherInfo.APIntImmPredicateFns[Predicate](Value))
+        if (handleReject() == RejectAndGiveUp)
+          return false;
+      break;
+    }
+    case GIM_CheckAPFloatImmPredicate: {
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t Predicate = MatchTable[CurrentIdx++];
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs()
+                          << CurrentIdx << ": GIM_CheckAPFloatImmPredicate(MIs["
+                          << InsnID << "], Predicate=" << Predicate << ")\n");
+      assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
+      assert(State.MIs[InsnID]->getOpcode() == TargetOpcode::G_FCONSTANT &&
+             "Expected G_FCONSTANT");
+      assert(State.MIs[InsnID]->getOperand(1).isFPImm() && "Expected FPImm operand");
+      assert(Predicate > GIPFP_APFloat_Invalid && "Expected a valid predicate");
+      APFloat Value = State.MIs[InsnID]->getOperand(1).getFPImm()->getValueAPF();
+
+      if (!MatcherInfo.APFloatImmPredicateFns[Predicate](Value))
+        if (handleReject() == RejectAndGiveUp)
+          return false;
+      break;
+    }
+    case GIM_CheckAtomicOrdering: {
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      AtomicOrdering Ordering = (AtomicOrdering)MatchTable[CurrentIdx++];
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": GIM_CheckAtomicOrdering(MIs["
+                             << InsnID << "], " << (uint64_t)Ordering << ")\n");
+      assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
 
+      if (!State.MIs[InsnID]->hasOneMemOperand())
+        if (handleReject() == RejectAndGiveUp)
+          return false;
+
+      for (const auto &MMO : State.MIs[InsnID]->memoperands())
+        if (MMO->getOrdering() != Ordering)
+          if (handleReject() == RejectAndGiveUp)
+            return false;
+      break;
+    }
+    case GIM_CheckAtomicOrderingOrStrongerThan: {
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      AtomicOrdering Ordering = (AtomicOrdering)MatchTable[CurrentIdx++];
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx
+                             << ": GIM_CheckAtomicOrderingOrStrongerThan(MIs["
+                             << InsnID << "], " << (uint64_t)Ordering << ")\n");
+      assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
+
+      if (!State.MIs[InsnID]->hasOneMemOperand())
+        if (handleReject() == RejectAndGiveUp)
+          return false;
+
+      for (const auto &MMO : State.MIs[InsnID]->memoperands())
+        if (!isAtLeastOrStrongerThan(MMO->getOrdering(), Ordering))
+          if (handleReject() == RejectAndGiveUp)
+            return false;
+      break;
+    }
+    case GIM_CheckAtomicOrderingWeakerThan: {
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      AtomicOrdering Ordering = (AtomicOrdering)MatchTable[CurrentIdx++];
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx
+                             << ": GIM_CheckAtomicOrderingWeakerThan(MIs["
+                             << InsnID << "], " << (uint64_t)Ordering << ")\n");
+      assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
+
+      if (!State.MIs[InsnID]->hasOneMemOperand())
+        if (handleReject() == RejectAndGiveUp)
+          return false;
+
+      for (const auto &MMO : State.MIs[InsnID]->memoperands())
+        if (!isStrongerThan(Ordering, MMO->getOrdering()))
+          if (handleReject() == RejectAndGiveUp)
+            return false;
+      break;
+    }
     case GIM_CheckType: {
-      int64_t InsnID = *Command++;
-      int64_t OpIdx = *Command++;
-      int64_t TypeID = *Command++;
-      DEBUG(dbgs() << "GIM_CheckType(MIs[" << InsnID << "]->getOperand("
-                   << OpIdx << "), TypeID=" << TypeID << ")\n");
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t OpIdx = MatchTable[CurrentIdx++];
+      int64_t TypeID = MatchTable[CurrentIdx++];
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": GIM_CheckType(MIs[" << InsnID
+                             << "]->getOperand(" << OpIdx
+                             << "), TypeID=" << TypeID << ")\n");
       assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
       if (MRI.getType(State.MIs[InsnID]->getOperand(OpIdx).getReg()) !=
-          MatcherInfo.TypeObjects[TypeID])
-        return false;
+          MatcherInfo.TypeObjects[TypeID]) {
+        if (handleReject() == RejectAndGiveUp)
+          return false;
+      }
+      break;
+    }
+    case GIM_CheckPointerToAny: {
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t OpIdx = MatchTable[CurrentIdx++];
+      int64_t SizeInBits = MatchTable[CurrentIdx++];
+
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": GIM_CheckPointerToAny(MIs["
+                             << InsnID << "]->getOperand(" << OpIdx
+                             << "), SizeInBits=" << SizeInBits << ")\n");
+      assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
+
+      // iPTR must be looked up in the target.
+      if (SizeInBits == 0) {
+        MachineFunction *MF = State.MIs[InsnID]->getParent()->getParent();
+        SizeInBits = MF->getDataLayout().getPointerSizeInBits(0);
+      }
+
+      assert(SizeInBits != 0 && "Pointer size must be known");
+
+      const LLT &Ty = MRI.getType(State.MIs[InsnID]->getOperand(OpIdx).getReg());
+      if (!Ty.isPointer() || Ty.getSizeInBits() != SizeInBits) {
+        if (handleReject() == RejectAndGiveUp)
+          return false;
+      }
       break;
     }
     case GIM_CheckRegBankForClass: {
-      int64_t InsnID = *Command++;
-      int64_t OpIdx = *Command++;
-      int64_t RCEnum = *Command++;
-      DEBUG(dbgs() << "GIM_CheckRegBankForClass(MIs[" << InsnID
-                   << "]->getOperand(" << OpIdx << "), RCEnum=" << RCEnum
-                   << ")\n");
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t OpIdx = MatchTable[CurrentIdx++];
+      int64_t RCEnum = MatchTable[CurrentIdx++];
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": GIM_CheckRegBankForClass(MIs["
+                             << InsnID << "]->getOperand(" << OpIdx
+                             << "), RCEnum=" << RCEnum << ")\n");
       assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
       if (&RBI.getRegBankFromRegClass(*TRI.getRegClass(RCEnum)) !=
-          RBI.getRegBank(State.MIs[InsnID]->getOperand(OpIdx).getReg(), MRI, TRI))
-        return false;
+          RBI.getRegBank(State.MIs[InsnID]->getOperand(OpIdx).getReg(), MRI,
+                         TRI)) {
+        if (handleReject() == RejectAndGiveUp)
+          return false;
+      }
       break;
     }
+
     case GIM_CheckComplexPattern: {
-      int64_t InsnID = *Command++;
-      int64_t OpIdx = *Command++;
-      int64_t RendererID = *Command++;
-      int64_t ComplexPredicateID = *Command++;
-      DEBUG(dbgs() << "State.Renderers[" << RendererID
-                   << "] = GIM_CheckComplexPattern(MIs[" << InsnID
-                   << "]->getOperand(" << OpIdx
-                   << "), ComplexPredicateID=" << ComplexPredicateID << ")\n");
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t OpIdx = MatchTable[CurrentIdx++];
+      int64_t RendererID = MatchTable[CurrentIdx++];
+      int64_t ComplexPredicateID = MatchTable[CurrentIdx++];
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": State.Renderers[" << RendererID
+                             << "] = GIM_CheckComplexPattern(MIs[" << InsnID
+                             << "]->getOperand(" << OpIdx
+                             << "), ComplexPredicateID=" << ComplexPredicateID
+                             << ")\n");
       assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
       // FIXME: Use std::invoke() when it's available.
-      if (!(State.Renderers[RendererID] =
-                (ISel.*MatcherInfo.ComplexPredicates[ComplexPredicateID])(
-                    State.MIs[InsnID]->getOperand(OpIdx))))
-        return false;
+      ComplexRendererFns Renderer =
+          (ISel.*MatcherInfo.ComplexPredicates[ComplexPredicateID])(
+              State.MIs[InsnID]->getOperand(OpIdx));
+      if (Renderer.hasValue())
+        State.Renderers[RendererID] = Renderer.getValue();
+      else
+        if (handleReject() == RejectAndGiveUp)
+          return false;
       break;
     }
+
     case GIM_CheckConstantInt: {
-      int64_t InsnID = *Command++;
-      int64_t OpIdx = *Command++;
-      int64_t Value = *Command++;
-      DEBUG(dbgs() << "GIM_CheckConstantInt(MIs[" << InsnID << "]->getOperand("
-                   << OpIdx << "), Value=" << Value << ")\n");
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t OpIdx = MatchTable[CurrentIdx++];
+      int64_t Value = MatchTable[CurrentIdx++];
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": GIM_CheckConstantInt(MIs["
+                             << InsnID << "]->getOperand(" << OpIdx
+                             << "), Value=" << Value << ")\n");
       assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
-      if (!isOperandImmEqual(State.MIs[InsnID]->getOperand(OpIdx), Value, MRI))
-        return false;
+
+      // isOperandImmEqual() will sign-extend to 64-bits, so should we.
+      LLT Ty = MRI.getType(State.MIs[InsnID]->getOperand(OpIdx).getReg());
+      Value = SignExtend64(Value, Ty.getSizeInBits());
+
+      if (!isOperandImmEqual(State.MIs[InsnID]->getOperand(OpIdx), Value,
+                             MRI)) {
+        if (handleReject() == RejectAndGiveUp)
+          return false;
+      }
       break;
     }
+
     case GIM_CheckLiteralInt: {
-      int64_t InsnID = *Command++;
-      int64_t OpIdx = *Command++;
-      int64_t Value = *Command++;
-      DEBUG(dbgs() << "GIM_CheckLiteralInt(MIs[" << InsnID << "]->getOperand(" << OpIdx
-                   << "), Value=" << Value << ")\n");
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t OpIdx = MatchTable[CurrentIdx++];
+      int64_t Value = MatchTable[CurrentIdx++];
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": GIM_CheckLiteralInt(MIs["
+                             << InsnID << "]->getOperand(" << OpIdx
+                             << "), Value=" << Value << ")\n");
       assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
-      MachineOperand &OM = State.MIs[InsnID]->getOperand(OpIdx);
-      if (!OM.isCImm() || !OM.getCImm()->equalsInt(Value))
-        return false;
+      MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx);
+      if (!MO.isCImm() || !MO.getCImm()->equalsInt(Value)) {
+        if (handleReject() == RejectAndGiveUp)
+          return false;
+      }
       break;
     }
+
     case GIM_CheckIntrinsicID: {
-      int64_t InsnID = *Command++;
-      int64_t OpIdx = *Command++;
-      int64_t Value = *Command++;
-      DEBUG(dbgs() << "GIM_CheckIntrinsicID(MIs[" << InsnID << "]->getOperand(" << OpIdx
-                   << "), Value=" << Value << ")\n");
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t OpIdx = MatchTable[CurrentIdx++];
+      int64_t Value = MatchTable[CurrentIdx++];
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": GIM_CheckIntrinsicID(MIs["
+                             << InsnID << "]->getOperand(" << OpIdx
+                             << "), Value=" << Value << ")\n");
       assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
-      MachineOperand &OM = State.MIs[InsnID]->getOperand(OpIdx);
-      if (!OM.isIntrinsicID() || OM.getIntrinsicID() != Value)
-        return false;
+      MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx);
+      if (!MO.isIntrinsicID() || MO.getIntrinsicID() != Value)
+        if (handleReject() == RejectAndGiveUp)
+          return false;
       break;
     }
+
     case GIM_CheckIsMBB: {
-      int64_t InsnID = *Command++;
-      int64_t OpIdx = *Command++;
-      DEBUG(dbgs() << "GIM_CheckIsMBB(MIs[" << InsnID << "]->getOperand("
-                   << OpIdx << "))\n");
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t OpIdx = MatchTable[CurrentIdx++];
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": GIM_CheckIsMBB(MIs[" << InsnID
+                             << "]->getOperand(" << OpIdx << "))\n");
       assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
-      if (!State.MIs[InsnID]->getOperand(OpIdx).isMBB())
-        return false;
+      if (!State.MIs[InsnID]->getOperand(OpIdx).isMBB()) {
+        if (handleReject() == RejectAndGiveUp)
+          return false;
+      }
       break;
     }
 
     case GIM_CheckIsSafeToFold: {
-      int64_t InsnID = *Command++;
-      DEBUG(dbgs() << "GIM_CheckIsSafeToFold(MIs[" << InsnID << "])\n");
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": GIM_CheckIsSafeToFold(MIs["
+                             << InsnID << "])\n");
       assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
-      if (!isObviouslySafeToFold(*State.MIs[InsnID]))
-        return false;
+      if (!isObviouslySafeToFold(*State.MIs[InsnID], *State.MIs[0])) {
+        if (handleReject() == RejectAndGiveUp)
+          return false;
+      }
+      break;
+    }
+    case GIM_CheckIsSameOperand: {
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t OpIdx = MatchTable[CurrentIdx++];
+      int64_t OtherInsnID = MatchTable[CurrentIdx++];
+      int64_t OtherOpIdx = MatchTable[CurrentIdx++];
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": GIM_CheckIsSameOperand(MIs["
+                             << InsnID << "][" << OpIdx << "], MIs["
+                             << OtherInsnID << "][" << OtherOpIdx << "])\n");
+      assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
+      assert(State.MIs[OtherInsnID] != nullptr && "Used insn before defined");
+      if (!State.MIs[InsnID]->getOperand(OpIdx).isIdenticalTo(
+              State.MIs[OtherInsnID]->getOperand(OtherOpIdx))) {
+        if (handleReject() == RejectAndGiveUp)
+          return false;
+      }
       break;
     }
+    case GIM_Reject:
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": GIM_Reject");
+      if (handleReject() == RejectAndGiveUp)
+        return false;
+      break;
 
     case GIR_MutateOpcode: {
-      int64_t OldInsnID = *Command++;
-      int64_t NewInsnID = *Command++;
-      int64_t NewOpcode = *Command++;
-      assert((size_t)NewInsnID == OutMIs.size() &&
-             "Expected to store MIs in order");
-      OutMIs.push_back(
-          MachineInstrBuilder(*State.MIs[OldInsnID]->getParent()->getParent(),
-                              State.MIs[OldInsnID]));
+      int64_t OldInsnID = MatchTable[CurrentIdx++];
+      uint64_t NewInsnID = MatchTable[CurrentIdx++];
+      int64_t NewOpcode = MatchTable[CurrentIdx++];
+      if (NewInsnID >= OutMIs.size())
+        OutMIs.resize(NewInsnID + 1);
+
+      OutMIs[NewInsnID] = MachineInstrBuilder(*State.MIs[OldInsnID]->getMF(),
+                                              State.MIs[OldInsnID]);
       OutMIs[NewInsnID]->setDesc(TII.get(NewOpcode));
-      DEBUG(dbgs() << "GIR_MutateOpcode(OutMIs[" << NewInsnID << "], MIs["
-                   << OldInsnID << "], " << NewOpcode << ")\n");
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": GIR_MutateOpcode(OutMIs["
+                             << NewInsnID << "], MIs[" << OldInsnID << "], "
+                             << NewOpcode << ")\n");
       break;
     }
+
     case GIR_BuildMI: {
-      int64_t InsnID = *Command++;
-      int64_t Opcode = *Command++;
-      assert((size_t)InsnID == OutMIs.size() &&
-             "Expected to store MIs in order");
-      (void)InsnID;
-      OutMIs.push_back(BuildMI(*State.MIs[0]->getParent(), State.MIs[0],
-                               State.MIs[0]->getDebugLoc(), TII.get(Opcode)));
-      DEBUG(dbgs() << "GIR_BuildMI(OutMIs[" << InsnID << "], " << Opcode
-                   << ")\n");
+      uint64_t NewInsnID = MatchTable[CurrentIdx++];
+      int64_t Opcode = MatchTable[CurrentIdx++];
+      if (NewInsnID >= OutMIs.size())
+        OutMIs.resize(NewInsnID + 1);
+
+      OutMIs[NewInsnID] = BuildMI(*State.MIs[0]->getParent(), State.MIs[0],
+                                  State.MIs[0]->getDebugLoc(), TII.get(Opcode));
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": GIR_BuildMI(OutMIs["
+                             << NewInsnID << "], " << Opcode << ")\n");
       break;
     }
 
     case GIR_Copy: {
-      int64_t NewInsnID = *Command++;
-      int64_t OldInsnID = *Command++;
-      int64_t OpIdx = *Command++;
+      int64_t NewInsnID = MatchTable[CurrentIdx++];
+      int64_t OldInsnID = MatchTable[CurrentIdx++];
+      int64_t OpIdx = MatchTable[CurrentIdx++];
       assert(OutMIs[NewInsnID] && "Attempted to add to undefined instruction");
       OutMIs[NewInsnID].add(State.MIs[OldInsnID]->getOperand(OpIdx));
-      DEBUG(dbgs() << "GIR_Copy(OutMIs[" << NewInsnID << "], MIs[" << OldInsnID
-                   << "], " << OpIdx << ")\n");
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs()
+                          << CurrentIdx << ": GIR_Copy(OutMIs[" << NewInsnID
+                          << "], MIs[" << OldInsnID << "], " << OpIdx << ")\n");
       break;
     }
+
+    case GIR_CopyOrAddZeroReg: {
+      int64_t NewInsnID = MatchTable[CurrentIdx++];
+      int64_t OldInsnID = MatchTable[CurrentIdx++];
+      int64_t OpIdx = MatchTable[CurrentIdx++];
+      int64_t ZeroReg = MatchTable[CurrentIdx++];
+      assert(OutMIs[NewInsnID] && "Attempted to add to undefined instruction");
+      MachineOperand &MO = State.MIs[OldInsnID]->getOperand(OpIdx);
+      if (isOperandImmEqual(MO, 0, MRI))
+        OutMIs[NewInsnID].addReg(ZeroReg);
+      else
+        OutMIs[NewInsnID].add(MO);
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": GIR_CopyOrAddZeroReg(OutMIs["
+                             << NewInsnID << "], MIs[" << OldInsnID << "], "
+                             << OpIdx << ", " << ZeroReg << ")\n");
+      break;
+    }
+
     case GIR_CopySubReg: {
-      int64_t NewInsnID = *Command++;
-      int64_t OldInsnID = *Command++;
-      int64_t OpIdx = *Command++;
-      int64_t SubRegIdx = *Command++;
+      int64_t NewInsnID = MatchTable[CurrentIdx++];
+      int64_t OldInsnID = MatchTable[CurrentIdx++];
+      int64_t OpIdx = MatchTable[CurrentIdx++];
+      int64_t SubRegIdx = MatchTable[CurrentIdx++];
       assert(OutMIs[NewInsnID] && "Attempted to add to undefined instruction");
       OutMIs[NewInsnID].addReg(State.MIs[OldInsnID]->getOperand(OpIdx).getReg(),
                                0, SubRegIdx);
-      DEBUG(dbgs() << "GIR_CopySubReg(OutMIs[" << NewInsnID << "], MIs["
-                   << OldInsnID << "], " << OpIdx << ", " << SubRegIdx
-                   << ")\n");
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": GIR_CopySubReg(OutMIs["
+                             << NewInsnID << "], MIs[" << OldInsnID << "], "
+                             << OpIdx << ", " << SubRegIdx << ")\n");
       break;
     }
+
     case GIR_AddImplicitDef: {
-      int64_t InsnID = *Command++;
-      int64_t RegNum = *Command++;
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t RegNum = MatchTable[CurrentIdx++];
       assert(OutMIs[InsnID] && "Attempted to add to undefined instruction");
       OutMIs[InsnID].addDef(RegNum, RegState::Implicit);
-      DEBUG(dbgs() << "GIR_AddImplicitDef(OutMIs[" << InsnID << "], " << RegNum
-                   << ")\n");
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": GIR_AddImplicitDef(OutMIs["
+                             << InsnID << "], " << RegNum << ")\n");
       break;
     }
+
     case GIR_AddImplicitUse: {
-      int64_t InsnID = *Command++;
-      int64_t RegNum = *Command++;
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t RegNum = MatchTable[CurrentIdx++];
       assert(OutMIs[InsnID] && "Attempted to add to undefined instruction");
       OutMIs[InsnID].addUse(RegNum, RegState::Implicit);
-      DEBUG(dbgs() << "GIR_AddImplicitUse(OutMIs[" << InsnID << "], " << RegNum
-                   << ")\n");
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": GIR_AddImplicitUse(OutMIs["
+                             << InsnID << "], " << RegNum << ")\n");
       break;
     }
+
     case GIR_AddRegister: {
-      int64_t InsnID = *Command++;
-      int64_t RegNum = *Command++;
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t RegNum = MatchTable[CurrentIdx++];
       assert(OutMIs[InsnID] && "Attempted to add to undefined instruction");
       OutMIs[InsnID].addReg(RegNum);
-      DEBUG(dbgs() << "GIR_AddRegister(OutMIs[" << InsnID << "], " << RegNum
-                   << ")\n");
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": GIR_AddRegister(OutMIs["
+                             << InsnID << "], " << RegNum << ")\n");
+      break;
+    }
+
+    case GIR_AddTempRegister: {
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t TempRegID = MatchTable[CurrentIdx++];
+      uint64_t TempRegFlags = MatchTable[CurrentIdx++];
+      assert(OutMIs[InsnID] && "Attempted to add to undefined instruction");
+      OutMIs[InsnID].addReg(State.TempRegisters[TempRegID], TempRegFlags);
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": GIR_AddTempRegister(OutMIs["
+                             << InsnID << "], TempRegisters[" << TempRegID
+                             << "], " << TempRegFlags << ")\n");
       break;
     }
+
     case GIR_AddImm: {
-      int64_t InsnID = *Command++;
-      int64_t Imm = *Command++;
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t Imm = MatchTable[CurrentIdx++];
       assert(OutMIs[InsnID] && "Attempted to add to undefined instruction");
       OutMIs[InsnID].addImm(Imm);
-      DEBUG(dbgs() << "GIR_AddImm(OutMIs[" << InsnID << "], " << Imm << ")\n");
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": GIR_AddImm(OutMIs[" << InsnID
+                             << "], " << Imm << ")\n");
       break;
     }
+
     case GIR_ComplexRenderer: {
-      int64_t InsnID = *Command++;
-      int64_t RendererID = *Command++;
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t RendererID = MatchTable[CurrentIdx++];
+      assert(OutMIs[InsnID] && "Attempted to add to undefined instruction");
+      for (const auto &RenderOpFn : State.Renderers[RendererID])
+        RenderOpFn(OutMIs[InsnID]);
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": GIR_ComplexRenderer(OutMIs["
+                             << InsnID << "], " << RendererID << ")\n");
+      break;
+    }
+    case GIR_ComplexSubOperandRenderer: {
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t RendererID = MatchTable[CurrentIdx++];
+      int64_t RenderOpID = MatchTable[CurrentIdx++];
       assert(OutMIs[InsnID] && "Attempted to add to undefined instruction");
-      State.Renderers[RendererID](OutMIs[InsnID]);
-      DEBUG(dbgs() << "GIR_ComplexRenderer(OutMIs[" << InsnID << "], "
-                   << RendererID << ")\n");
+      State.Renderers[RendererID][RenderOpID](OutMIs[InsnID]);
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx
+                             << ": GIR_ComplexSubOperandRenderer(OutMIs["
+                             << InsnID << "], " << RendererID << ", "
+                             << RenderOpID << ")\n");
+      break;
+    }
+
+    case GIR_CopyConstantAsSImm: {
+      int64_t NewInsnID = MatchTable[CurrentIdx++];
+      int64_t OldInsnID = MatchTable[CurrentIdx++];
+      assert(OutMIs[NewInsnID] && "Attempted to add to undefined instruction");
+      assert(State.MIs[OldInsnID]->getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
+      if (State.MIs[OldInsnID]->getOperand(1).isCImm()) {
+        OutMIs[NewInsnID].addImm(
+            State.MIs[OldInsnID]->getOperand(1).getCImm()->getSExtValue());
+      } else if (State.MIs[OldInsnID]->getOperand(1).isImm())
+        OutMIs[NewInsnID].add(State.MIs[OldInsnID]->getOperand(1));
+      else
+        llvm_unreachable("Expected Imm or CImm operand");
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": GIR_CopyConstantAsSImm(OutMIs["
+                             << NewInsnID << "], MIs[" << OldInsnID << "])\n");
       break;
     }
 
     case GIR_ConstrainOperandRC: {
-      int64_t InsnID = *Command++;
-      int64_t OpIdx = *Command++;
-      int64_t RCEnum = *Command++;
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t OpIdx = MatchTable[CurrentIdx++];
+      int64_t RCEnum = MatchTable[CurrentIdx++];
       assert(OutMIs[InsnID] && "Attempted to add to undefined instruction");
       constrainOperandRegToRegClass(*OutMIs[InsnID].getInstr(), OpIdx,
                                     *TRI.getRegClass(RCEnum), TII, TRI, RBI);
-      DEBUG(dbgs() << "GIR_ConstrainOperandRC(OutMIs[" << InsnID << "], "
-                   << OpIdx << ", " << RCEnum << ")\n");
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": GIR_ConstrainOperandRC(OutMIs["
+                             << InsnID << "], " << OpIdx << ", " << RCEnum
+                             << ")\n");
       break;
     }
+
     case GIR_ConstrainSelectedInstOperands: {
-      int64_t InsnID = *Command++;
+      int64_t InsnID = MatchTable[CurrentIdx++];
       assert(OutMIs[InsnID] && "Attempted to add to undefined instruction");
       constrainSelectedInstRegOperands(*OutMIs[InsnID].getInstr(), TII, TRI,
                                        RBI);
-      DEBUG(dbgs() << "GIR_ConstrainSelectedInstOperands(OutMIs[" << InsnID
-                   << "])\n");
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx
+                             << ": GIR_ConstrainSelectedInstOperands(OutMIs["
+                             << InsnID << "])\n");
       break;
     }
+
     case GIR_MergeMemOperands: {
-      int64_t InsnID = *Command++;
+      int64_t InsnID = MatchTable[CurrentIdx++];
       assert(OutMIs[InsnID] && "Attempted to add to undefined instruction");
-      for (const auto *FromMI : State.MIs)
-        for (const auto &MMO : FromMI->memoperands())
+
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": GIR_MergeMemOperands(OutMIs["
+                             << InsnID << "]");
+      int64_t MergeInsnID = GIU_MergeMemOperands_EndOfList;
+      while ((MergeInsnID = MatchTable[CurrentIdx++]) !=
+             GIU_MergeMemOperands_EndOfList) {
+        DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                        dbgs() << ", MIs[" << MergeInsnID << "]");
+        for (const auto &MMO : State.MIs[MergeInsnID]->memoperands())
           OutMIs[InsnID].addMemOperand(MMO);
-      DEBUG(dbgs() << "GIR_MergeMemOperands(OutMIs[" << InsnID << "])\n");
+      }
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), dbgs() << ")\n");
       break;
     }
+
     case GIR_EraseFromParent: {
-      int64_t InsnID = *Command++;
+      int64_t InsnID = MatchTable[CurrentIdx++];
       assert(State.MIs[InsnID] &&
              "Attempted to erase an undefined instruction");
       State.MIs[InsnID]->eraseFromParent();
-      DEBUG(dbgs() << "GIR_EraseFromParent(MIs[" << InsnID << "])\n");
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": GIR_EraseFromParent(MIs["
+                             << InsnID << "])\n");
+      break;
+    }
+
+    case GIR_MakeTempReg: {
+      int64_t TempRegID = MatchTable[CurrentIdx++];
+      int64_t TypeID = MatchTable[CurrentIdx++];
+
+      State.TempRegisters[TempRegID] =
+          MRI.createGenericVirtualRegister(MatcherInfo.TypeObjects[TypeID]);
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": TempRegs[" << TempRegID
+                             << "] = GIR_MakeTempReg(" << TypeID << ")\n");
+      break;
+    }
+
+    case GIR_Coverage: {
+      int64_t RuleID = MatchTable[CurrentIdx++];
+      CoverageInfo.setCovered(RuleID);
+
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs()
+                          << CurrentIdx << ": GIR_Coverage(" << RuleID << ")");
       break;
     }
 
     case GIR_Done:
-      DEBUG(dbgs() << "GIR_Done");
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": GIR_Done");
       return true;
 
     default:
diff --git a/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
new file mode 100644
index 0000000000000..e7945ff5bf4f2
--- /dev/null
+++ b/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -0,0 +1,287 @@
+//===-- llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h --===========//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This file contains some helper functions which try to cleanup artifacts
+// such as G_TRUNCs/G_[ZSA]EXTENDS that were created during legalization to make
+// the types match. This file also contains some combines of merges that happens
+// at the end of the legalization.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "legalizer"
+
+namespace llvm {
+class LegalizationArtifactCombiner {
+  MachineIRBuilder &Builder;
+  MachineRegisterInfo &MRI;
+  const LegalizerInfo &LI;
+
+public:
+  LegalizationArtifactCombiner(MachineIRBuilder &B, MachineRegisterInfo &MRI,
+                    const LegalizerInfo &LI)
+      : Builder(B), MRI(MRI), LI(LI) {}
+
+  bool tryCombineAnyExt(MachineInstr &MI,
+                        SmallVectorImpl<MachineInstr *> &DeadInsts) {
+    if (MI.getOpcode() != TargetOpcode::G_ANYEXT)
+      return false;
+    if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_TRUNC,
+                                           MI.getOperand(1).getReg(), MRI)) {
+      DEBUG(dbgs() << ".. Combine MI: " << MI;);
+      unsigned DstReg = MI.getOperand(0).getReg();
+      unsigned SrcReg = DefMI->getOperand(1).getReg();
+      Builder.setInstr(MI);
+      // We get a copy/trunc/extend depending on the sizes
+      Builder.buildAnyExtOrTrunc(DstReg, SrcReg);
+      markInstAndDefDead(MI, *DefMI, DeadInsts);
+      return true;
+    }
+    return tryFoldImplicitDef(MI, DeadInsts);
+  }
+
+  bool tryCombineZExt(MachineInstr &MI,
+                      SmallVectorImpl<MachineInstr *> &DeadInsts) {
+
+    if (MI.getOpcode() != TargetOpcode::G_ZEXT)
+      return false;
+    if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_TRUNC,
+                                           MI.getOperand(1).getReg(), MRI)) {
+      unsigned DstReg = MI.getOperand(0).getReg();
+      LLT DstTy = MRI.getType(DstReg);
+      if (isInstUnsupported(TargetOpcode::G_AND, DstTy) ||
+          isInstUnsupported(TargetOpcode::G_CONSTANT, DstTy))
+        return false;
+      DEBUG(dbgs() << ".. Combine MI: " << MI;);
+      Builder.setInstr(MI);
+      unsigned ZExtSrc = MI.getOperand(1).getReg();
+      LLT ZExtSrcTy = MRI.getType(ZExtSrc);
+      APInt Mask = APInt::getAllOnesValue(ZExtSrcTy.getSizeInBits());
+      auto MaskCstMIB = Builder.buildConstant(DstTy, Mask.getZExtValue());
+      unsigned TruncSrc = DefMI->getOperand(1).getReg();
+      // We get a copy/trunc/extend depending on the sizes
+      auto SrcCopyOrTrunc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc);
+      Builder.buildAnd(DstReg, SrcCopyOrTrunc, MaskCstMIB);
+      markInstAndDefDead(MI, *DefMI, DeadInsts);
+      return true;
+    }
+    return tryFoldImplicitDef(MI, DeadInsts);
+  }
+
+  bool tryCombineSExt(MachineInstr &MI,
+                      SmallVectorImpl<MachineInstr *> &DeadInsts) {
+
+    if (MI.getOpcode() != TargetOpcode::G_SEXT)
+      return false;
+    if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_TRUNC,
+                                           MI.getOperand(1).getReg(), MRI)) {
+      unsigned DstReg = MI.getOperand(0).getReg();
+      LLT DstTy = MRI.getType(DstReg);
+      if (isInstUnsupported(TargetOpcode::G_SHL, DstTy) ||
+          isInstUnsupported(TargetOpcode::G_ASHR, DstTy) ||
+          isInstUnsupported(TargetOpcode::G_CONSTANT, DstTy))
+        return false;
+      DEBUG(dbgs() << ".. Combine MI: " << MI;);
+      Builder.setInstr(MI);
+      unsigned SExtSrc = MI.getOperand(1).getReg();
+      LLT SExtSrcTy = MRI.getType(SExtSrc);
+      unsigned SizeDiff = DstTy.getSizeInBits() - SExtSrcTy.getSizeInBits();
+      auto SizeDiffMIB = Builder.buildConstant(DstTy, SizeDiff);
+      unsigned TruncSrcReg = DefMI->getOperand(1).getReg();
+      // We get a copy/trunc/extend depending on the sizes
+      auto SrcCopyExtOrTrunc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrcReg);
+      auto ShlMIB = Builder.buildInstr(TargetOpcode::G_SHL, DstTy,
+                                       SrcCopyExtOrTrunc, SizeDiffMIB);
+      Builder.buildInstr(TargetOpcode::G_ASHR, DstReg, ShlMIB, SizeDiffMIB);
+      markInstAndDefDead(MI, *DefMI, DeadInsts);
+      return true;
+    }
+    return tryFoldImplicitDef(MI, DeadInsts);
+  }
+
+  /// Try to fold sb = EXTEND (G_IMPLICIT_DEF sa) -> sb = G_IMPLICIT_DEF
+  bool tryFoldImplicitDef(MachineInstr &MI,
+                          SmallVectorImpl<MachineInstr *> &DeadInsts) {
+    unsigned Opcode = MI.getOpcode();
+    if (Opcode != TargetOpcode::G_ANYEXT && Opcode != TargetOpcode::G_ZEXT &&
+        Opcode != TargetOpcode::G_SEXT)
+      return false;
+
+    if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF,
+                                           MI.getOperand(1).getReg(), MRI)) {
+      unsigned DstReg = MI.getOperand(0).getReg();
+      LLT DstTy = MRI.getType(DstReg);
+      if (isInstUnsupported(TargetOpcode::G_IMPLICIT_DEF, DstTy))
+        return false;
+      DEBUG(dbgs() << ".. Combine EXT(IMPLICIT_DEF) " << MI;);
+      Builder.setInstr(MI);
+      Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, DstReg);
+      markInstAndDefDead(MI, *DefMI, DeadInsts);
+      return true;
+    }
+    return false;
+  }
+
+  bool tryCombineMerges(MachineInstr &MI,
+                        SmallVectorImpl<MachineInstr *> &DeadInsts) {
+
+    if (MI.getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
+      return false;
+
+    unsigned NumDefs = MI.getNumOperands() - 1;
+    unsigned SrcReg = MI.getOperand(NumDefs).getReg();
+    MachineInstr *MergeI = MRI.getVRegDef(SrcReg);
+    if (!MergeI || (MergeI->getOpcode() != TargetOpcode::G_MERGE_VALUES))
+      return false;
+
+    const unsigned NumMergeRegs = MergeI->getNumOperands() - 1;
+
+    if (NumMergeRegs < NumDefs) {
+      if (NumDefs % NumMergeRegs != 0)
+        return false;
+
+      Builder.setInstr(MI);
+      // Transform to UNMERGEs, for example
+      //   %1 = G_MERGE_VALUES %4, %5
+      //   %9, %10, %11, %12 = G_UNMERGE_VALUES %1
+      // to
+      //   %9, %10 = G_UNMERGE_VALUES %4
+      //   %11, %12 = G_UNMERGE_VALUES %5
+
+      const unsigned NewNumDefs = NumDefs / NumMergeRegs;
+      for (unsigned Idx = 0; Idx < NumMergeRegs; ++Idx) {
+        SmallVector<unsigned, 2> DstRegs;
+        for (unsigned j = 0, DefIdx = Idx * NewNumDefs; j < NewNumDefs;
+             ++j, ++DefIdx)
+          DstRegs.push_back(MI.getOperand(DefIdx).getReg());
+
+        Builder.buildUnmerge(DstRegs, MergeI->getOperand(Idx + 1).getReg());
+      }
+
+    } else if (NumMergeRegs > NumDefs) {
+      if (NumMergeRegs % NumDefs != 0)
+        return false;
+
+      Builder.setInstr(MI);
+      // Transform to MERGEs
+      //   %6 = G_MERGE_VALUES %17, %18, %19, %20
+      //   %7, %8 = G_UNMERGE_VALUES %6
+      // to
+      //   %7 = G_MERGE_VALUES %17, %18
+      //   %8 = G_MERGE_VALUES %19, %20
+
+      const unsigned NumRegs = NumMergeRegs / NumDefs;
+      for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) {
+        SmallVector<unsigned, 2> Regs;
+        for (unsigned j = 0, Idx = NumRegs * DefIdx + 1; j < NumRegs;
+             ++j, ++Idx)
+          Regs.push_back(MergeI->getOperand(Idx).getReg());
+
+        Builder.buildMerge(MI.getOperand(DefIdx).getReg(), Regs);
+      }
+
+    } else {
+      // FIXME: is a COPY appropriate if the types mismatch? We know both
+      // registers are allocatable by now.
+      if (MRI.getType(MI.getOperand(0).getReg()) !=
+          MRI.getType(MergeI->getOperand(1).getReg()))
+        return false;
+
+      for (unsigned Idx = 0; Idx < NumDefs; ++Idx)
+        MRI.replaceRegWith(MI.getOperand(Idx).getReg(),
+                           MergeI->getOperand(Idx + 1).getReg());
+    }
+
+    markInstAndDefDead(MI, *MergeI, DeadInsts);
+    return true;
+  }
+
+  /// Try to combine away MI.
+  /// Returns true if it combined away the MI.
+  /// Adds instructions that are dead as a result of the combine
+  /// into DeadInsts, which can include MI.
+  bool tryCombineInstruction(MachineInstr &MI,
+                             SmallVectorImpl<MachineInstr *> &DeadInsts) {
+    switch (MI.getOpcode()) {
+    default:
+      return false;
+    case TargetOpcode::G_ANYEXT:
+      return tryCombineAnyExt(MI, DeadInsts);
+    case TargetOpcode::G_ZEXT:
+      return tryCombineZExt(MI, DeadInsts);
+    case TargetOpcode::G_SEXT:
+      return tryCombineSExt(MI, DeadInsts);
+    case TargetOpcode::G_UNMERGE_VALUES:
+      return tryCombineMerges(MI, DeadInsts);
+    case TargetOpcode::G_TRUNC: {
+      bool Changed = false;
+      for (auto &Use : MRI.use_instructions(MI.getOperand(0).getReg()))
+        Changed |= tryCombineInstruction(Use, DeadInsts);
+      return Changed;
+    }
+    }
+  }
+
+private:
+  /// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be
+  /// dead due to MI being killed, then mark DefMI as dead too.
+  /// Some of the combines (extends(trunc)), try to walk through redundant
+  /// copies in between the extends and the truncs, and this attempts to collect
+  /// the in between copies if they're dead.
+  void markInstAndDefDead(MachineInstr &MI, MachineInstr &DefMI,
+                          SmallVectorImpl<MachineInstr *> &DeadInsts) {
+    DeadInsts.push_back(&MI);
+
+    // Collect all the copy instructions that are made dead, due to deleting
+    // this instruction. Collect all of them until the Trunc(DefMI).
+    // Eg,
+    // %1(s1) = G_TRUNC %0(s32)
+    // %2(s1) = COPY %1(s1)
+    // %3(s1) = COPY %2(s1)
+    // %4(s32) = G_ANYEXT %3(s1)
+    // In this case, we would have replaced %4 with a copy of %0,
+    // and as a result, %3, %2, %1 are dead.
+    MachineInstr *PrevMI = &MI;
+    while (PrevMI != &DefMI) {
+      // If we're dealing with G_UNMERGE_VALUES, tryCombineMerges doesn't really try
+      // to fold copies in between and we can ignore them here.
+      if (PrevMI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES)
+        break;
+      unsigned PrevRegSrc = PrevMI->getOperand(1).getReg();
+      MachineInstr *TmpDef = MRI.getVRegDef(PrevRegSrc);
+      if (MRI.hasOneUse(PrevRegSrc)) {
+        if (TmpDef != &DefMI) {
+          assert(TmpDef->getOpcode() == TargetOpcode::COPY &&
+                 "Expecting copy here");
+          DeadInsts.push_back(TmpDef);
+        }
+      } else
+        break;
+      PrevMI = TmpDef;
+    }
+    if ((PrevMI == &DefMI ||
+         DefMI.getOpcode() == TargetOpcode::G_MERGE_VALUES) &&
+        MRI.hasOneUse(DefMI.getOperand(0).getReg()))
+      DeadInsts.push_back(&DefMI);
+  }
+
+  /// Checks if the target legalizer info has specified anything about the
+  /// instruction, or if unsupported.
+  bool isInstUnsupported(unsigned Opcode, const LLT &DstTy) const {
+    auto Action = LI.getAction({Opcode, 0, DstTy});
+    return Action.first == LegalizerInfo::LegalizeAction::Unsupported ||
+           Action.first == LegalizerInfo::LegalizeAction::NotFound;
+  }
+};
+
+} // namespace llvm
diff --git a/include/llvm/CodeGen/GlobalISel/Legalizer.h b/include/llvm/CodeGen/GlobalISel/Legalizer.h
index 9b9b8b563a30e..8284ab6dac65e 100644
--- a/include/llvm/CodeGen/GlobalISel/Legalizer.h
+++ b/include/llvm/CodeGen/GlobalISel/Legalizer.h
@@ -58,9 +58,6 @@ public:
   bool combineExtracts(MachineInstr &MI, MachineRegisterInfo &MRI,
                        const TargetInstrInfo &TII);
 
-  bool combineMerges(MachineInstr &MI, MachineRegisterInfo &MRI,
-                     const TargetInstrInfo &TII, MachineIRBuilder &MIRBuilder);
-
   bool runOnMachineFunction(MachineFunction &MF) override;
 };
 } // End namespace llvm.
diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index 1fd45b52e3ac7..8bd8a9dcd0e24 100644
--- a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -89,6 +89,9 @@ public:
   /// functions
   MachineIRBuilder MIRBuilder;
 
+  /// Expose LegalizerInfo so the clients can re-use.
+  const LegalizerInfo &getLegalizerInfo() const { return LI; }
+
 private:
 
   /// Helper function to split a wide generic register into bitwise blocks with
diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
index c259e93fdd366..b6735d538b37c 100644
--- a/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
+++ b/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -20,11 +20,12 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/Support/LowLevelTypeImpl.h"
-#include "llvm/Target/TargetOpcodes.h"
-#include <cstdint>
 #include <cassert>
+#include <cstdint>
 #include <tuple>
+#include <unordered_map>
 #include <utility>
 
 namespace llvm {
@@ -120,27 +121,144 @@ public:
     }
   }
 
+  typedef std::pair<uint16_t, LegalizeAction> SizeAndAction;
+  typedef std::vector<SizeAndAction> SizeAndActionsVec;
+  using SizeChangeStrategy =
+      std::function<SizeAndActionsVec(const SizeAndActionsVec &v)>;
+
   /// More friendly way to set an action for common types that have an LLT
   /// representation.
+  /// The LegalizeAction must be one for which NeedsLegalizingToDifferentSize
+  /// returns false.
   void setAction(const InstrAspect &Aspect, LegalizeAction Action) {
+    assert(!needsLegalizingToDifferentSize(Action));
     TablesInitialized = false;
-    unsigned Opcode = Aspect.Opcode - FirstOp;
-    if (Actions[Opcode].size() <= Aspect.Idx)
-      Actions[Opcode].resize(Aspect.Idx + 1);
-    Actions[Aspect.Opcode - FirstOp][Aspect.Idx][Aspect.Type] = Action;
+    const unsigned OpcodeIdx = Aspect.Opcode - FirstOp;
+    if (SpecifiedActions[OpcodeIdx].size() <= Aspect.Idx)
+      SpecifiedActions[OpcodeIdx].resize(Aspect.Idx + 1);
+    SpecifiedActions[OpcodeIdx][Aspect.Idx][Aspect.Type] = Action;
   }
 
-  /// If an operation on a given vector type (say <M x iN>) isn't explicitly
-  /// specified, we proceed in 2 stages. First we legalize the underlying scalar
-  /// (so that there's at least one legal vector with that scalar), then we
-  /// adjust the number of elements in the vector so that it is legal. The
-  /// desired action in the first step is controlled by this function.
-  void setScalarInVectorAction(unsigned Opcode, LLT ScalarTy,
-                               LegalizeAction Action) {
-    assert(!ScalarTy.isVector());
-    ScalarInVectorActions[std::make_pair(Opcode, ScalarTy)] = Action;
+  /// The setAction calls record the non-size-changing legalization actions
+  /// to take on specificly-sized types. The SizeChangeStrategy defines what
+  /// to do when the size of the type needs to be changed to reach a legally
+  /// sized type (i.e., one that was defined through a setAction call).
+  /// e.g.
+  /// setAction ({G_ADD, 0, LLT::scalar(32)}, Legal);
+  /// setLegalizeScalarToDifferentSizeStrategy(
+  ///   G_ADD, 0, widenToLargerTypesAndNarrowToLargest);
+  /// will end up defining getAction({G_ADD, 0, T}) to return the following 
+  /// actions for different scalar types T:
+  ///  LLT::scalar(1)..LLT::scalar(31): {WidenScalar, 0, LLT::scalar(32)}
+  ///  LLT::scalar(32):                 {Legal, 0, LLT::scalar(32)}
+  ///  LLT::scalar(33)..:               {NarrowScalar, 0, LLT::scalar(32)}
+  ///
+  /// If no SizeChangeAction gets defined, through this function,
+  /// the default is unsupportedForDifferentSizes.
+  void setLegalizeScalarToDifferentSizeStrategy(const unsigned Opcode,
+                                                const unsigned TypeIdx,
+                                                SizeChangeStrategy S) {
+    const unsigned OpcodeIdx = Opcode - FirstOp;
+    if (ScalarSizeChangeStrategies[OpcodeIdx].size() <= TypeIdx)
+      ScalarSizeChangeStrategies[OpcodeIdx].resize(TypeIdx + 1);
+    ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx] = S;
+  }
+
+  /// See also setLegalizeScalarToDifferentSizeStrategy.
+  /// This function allows to set the SizeChangeStrategy for vector elements.
+  void setLegalizeVectorElementToDifferentSizeStrategy(const unsigned Opcode,
+                                                       const unsigned TypeIdx,
+                                                       SizeChangeStrategy S) {
+    const unsigned OpcodeIdx = Opcode - FirstOp;
+    if (VectorElementSizeChangeStrategies[OpcodeIdx].size() <= TypeIdx)
+      VectorElementSizeChangeStrategies[OpcodeIdx].resize(TypeIdx + 1);
+    VectorElementSizeChangeStrategies[OpcodeIdx][TypeIdx] = S;
+  }
+
+  /// A SizeChangeStrategy for the common case where legalization for a 
+  /// particular operation consists of only supporting a specific set of type
+  /// sizes. E.g.
+  ///   setAction ({G_DIV, 0, LLT::scalar(32)}, Legal);
+  ///   setAction ({G_DIV, 0, LLT::scalar(64)}, Legal);
+  ///   setLegalizeScalarToDifferentSizeStrategy(
+  ///     G_DIV, 0, unsupportedForDifferentSizes);
+  /// will result in getAction({G_DIV, 0, T}) to return Legal for s32 and s64,
+  /// and Unsupported for all other scalar types T.
+  static SizeAndActionsVec
+  unsupportedForDifferentSizes(const SizeAndActionsVec &v) {
+    return increaseToLargerTypesAndDecreaseToLargest(v, Unsupported,
+                                                        Unsupported);
   }
 
+  /// A SizeChangeStrategy for the common case where legalization for a
+  /// particular operation consists of widening the type to a large legal type,
+  /// unless there is no such type and then instead it should be narrowed to the
+  /// largest legal type.
+  static SizeAndActionsVec
+  widenToLargerTypesAndNarrowToLargest(const SizeAndActionsVec &v) {
+    assert(v.size() > 0 &&
+           "At least one size that can be legalized towards is needed"
+           " for this SizeChangeStrategy");
+    return increaseToLargerTypesAndDecreaseToLargest(v, WidenScalar,
+                                                        NarrowScalar);
+  }
+
+  static SizeAndActionsVec
+  widenToLargerTypesUnsupportedOtherwise(const SizeAndActionsVec &v) {
+    return increaseToLargerTypesAndDecreaseToLargest(v, WidenScalar,
+                                                        Unsupported);
+  }
+
+  static SizeAndActionsVec
+  narrowToSmallerAndUnsupportedIfTooSmall(const SizeAndActionsVec &v) {
+    return decreaseToSmallerTypesAndIncreaseToSmallest(v, NarrowScalar,
+                                                          Unsupported);
+  }
+
+  static SizeAndActionsVec
+  narrowToSmallerAndWidenToSmallest(const SizeAndActionsVec &v) {
+    assert(v.size() > 0 &&
+           "At least one size that can be legalized towards is needed"
+           " for this SizeChangeStrategy");
+    return decreaseToSmallerTypesAndIncreaseToSmallest(v, NarrowScalar,
+                                                          WidenScalar);
+  }
+
+  /// A SizeChangeStrategy for the common case where legalization for a
+  /// particular vector operation consists of having more elements in the
+  /// vector, to a type that is legal. Unless there is no such type and then
+  /// instead it should be legalized towards the widest vector that's still
+  /// legal. E.g.
+  ///   setAction({G_ADD, LLT::vector(8, 8)}, Legal);
+  ///   setAction({G_ADD, LLT::vector(16, 8)}, Legal);
+  ///   setAction({G_ADD, LLT::vector(2, 32)}, Legal);
+  ///   setAction({G_ADD, LLT::vector(4, 32)}, Legal);
+  ///   setLegalizeVectorElementToDifferentSizeStrategy(
+  ///     G_ADD, 0, moreToWiderTypesAndLessToWidest);
+  /// will result in the following getAction results:
+  ///   * getAction({G_ADD, LLT::vector(8,8)}) returns
+  ///       (Legal, vector(8,8)).
+  ///   * getAction({G_ADD, LLT::vector(9,8)}) returns
+  ///       (MoreElements, vector(16,8)).
+  ///   * getAction({G_ADD, LLT::vector(8,32)}) returns
+  ///       (FewerElements, vector(4,32)).
+  static SizeAndActionsVec
+  moreToWiderTypesAndLessToWidest(const SizeAndActionsVec &v) {
+    return increaseToLargerTypesAndDecreaseToLargest(v, MoreElements,
+                                                        FewerElements);
+  }
+
+  /// Helper function to implement many typical SizeChangeStrategy functions.
+  static SizeAndActionsVec
+  increaseToLargerTypesAndDecreaseToLargest(const SizeAndActionsVec &v,
+                                            LegalizeAction IncreaseAction,
+                                            LegalizeAction DecreaseAction);
+  /// Helper function to implement many typical SizeChangeStrategy functions.
+  static SizeAndActionsVec
+  decreaseToSmallerTypesAndIncreaseToSmallest(const SizeAndActionsVec &v,
+                                              LegalizeAction DecreaseAction,
+                                              LegalizeAction IncreaseAction);
+
   /// Determine what action should be taken to legalize the given generic
   /// instruction opcode, type-index and type. Requires computeTables to have
   /// been called.
@@ -158,55 +276,6 @@ public:
   std::tuple<LegalizeAction, unsigned, LLT>
   getAction(const MachineInstr &MI, const MachineRegisterInfo &MRI) const;
 
-  /// Iterate the given function (typically something like doubling the width)
-  /// on Ty until we find a legal type for this operation.
-  Optional<LLT> findLegalizableSize(const InstrAspect &Aspect,
-                                    function_ref<LLT(LLT)> NextType) const {
-    LegalizeAction Action;
-    const TypeMap &Map = Actions[Aspect.Opcode - FirstOp][Aspect.Idx];
-    LLT Ty = Aspect.Type;
-    do {
-      Ty = NextType(Ty);
-      auto ActionIt = Map.find(Ty);
-      if (ActionIt == Map.end()) {
-        auto DefaultIt = DefaultActions.find(Aspect.Opcode);
-        if (DefaultIt == DefaultActions.end())
-          return None;
-        Action = DefaultIt->second;
-      } else
-        Action = ActionIt->second;
-    } while (needsLegalizingToDifferentSize(Action));
-    return Ty;
-  }
-
-  /// Find what type it's actually OK to perform the given operation on, given
-  /// the general approach we've decided to take.
-  Optional<LLT> findLegalType(const InstrAspect &Aspect, LegalizeAction Action) const;
-
-  std::pair<LegalizeAction, LLT> findLegalAction(const InstrAspect &Aspect,
-                                                 LegalizeAction Action) const {
-    auto LegalType = findLegalType(Aspect, Action);
-    if (!LegalType)
-      return std::make_pair(LegalizeAction::Unsupported, LLT());
-    return std::make_pair(Action, *LegalType);
-  }
-
-  /// Find the specified \p Aspect in the primary (explicitly set) Actions
-  /// table. Returns either the action the target requested or NotFound if there
-  /// was no setAction call.
-  LegalizeAction findInActions(const InstrAspect &Aspect) const {
-    if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp)
-      return NotFound;
-    if (Aspect.Idx >= Actions[Aspect.Opcode - FirstOp].size())
-      return NotFound;
-    const TypeMap &Map = Actions[Aspect.Opcode - FirstOp][Aspect.Idx];
-    auto ActionIt =  Map.find(Aspect.Type);
-    if (ActionIt == Map.end())
-      return NotFound;
-
-    return ActionIt->second;
-  }
-
   bool isLegal(const MachineInstr &MI, const MachineRegisterInfo &MRI) const;
 
   virtual bool legalizeCustom(MachineInstr &MI,
@@ -214,20 +283,181 @@ public:
                               MachineIRBuilder &MIRBuilder) const;
 
 private:
-  static const int FirstOp = TargetOpcode::PRE_ISEL_GENERIC_OPCODE_START;
-  static const int LastOp = TargetOpcode::PRE_ISEL_GENERIC_OPCODE_END;
+  /// The SizeAndActionsVec is a representation mapping between all natural
+  /// numbers and an Action. The natural number represents the bit size of
+  /// the InstrAspect. For example, for a target with native support for 32-bit
+  /// and 64-bit additions, you'd express that as:
+  /// setScalarAction(G_ADD, 0,
+  ///           {{1, WidenScalar},  // bit sizes [ 1, 31[
+  ///            {32, Legal},       // bit sizes [32, 33[
+  ///            {33, WidenScalar}, // bit sizes [33, 64[
+  ///            {64, Legal},       // bit sizes [64, 65[
+  ///            {65, NarrowScalar} // bit sizes [65, +inf[
+  ///           });
+  /// It may be that only 64-bit pointers are supported on your target:
+  /// setPointerAction(G_GEP, 0, LLT:pointer(1),
+  ///           {{1, Unsupported},  // bit sizes [ 1, 63[
+  ///            {64, Legal},       // bit sizes [64, 65[
+  ///            {65, Unsupported}, // bit sizes [65, +inf[
+  ///           });
+  void setScalarAction(const unsigned Opcode, const unsigned TypeIndex,
+                       const SizeAndActionsVec &SizeAndActions) {
+    const unsigned OpcodeIdx = Opcode - FirstOp;
+    SmallVector<SizeAndActionsVec, 1> &Actions = ScalarActions[OpcodeIdx];
+    setActions(TypeIndex, Actions, SizeAndActions);
+  }
+  void setPointerAction(const unsigned Opcode, const unsigned TypeIndex,
+                        const unsigned AddressSpace,
+                        const SizeAndActionsVec &SizeAndActions) {
+    const unsigned OpcodeIdx = Opcode - FirstOp;
+    if (AddrSpace2PointerActions[OpcodeIdx].find(AddressSpace) ==
+        AddrSpace2PointerActions[OpcodeIdx].end())
+      AddrSpace2PointerActions[OpcodeIdx][AddressSpace] = {{}};
+    SmallVector<SizeAndActionsVec, 1> &Actions =
+        AddrSpace2PointerActions[OpcodeIdx].find(AddressSpace)->second;
+    setActions(TypeIndex, Actions, SizeAndActions);
+  }
 
-  using TypeMap = DenseMap<LLT, LegalizeAction>;
-  using SIVActionMap = DenseMap<std::pair<unsigned, LLT>, LegalizeAction>;
+  /// If an operation on a given vector type (say <M x iN>) isn't explicitly
+  /// specified, we proceed in 2 stages. First we legalize the underlying scalar
+  /// (so that there's at least one legal vector with that scalar), then we
+  /// adjust the number of elements in the vector so that it is legal. The
+  /// desired action in the first step is controlled by this function.
+  void setScalarInVectorAction(const unsigned Opcode, const unsigned TypeIndex,
+                               const SizeAndActionsVec &SizeAndActions) {
+    unsigned OpcodeIdx = Opcode - FirstOp;
+    SmallVector<SizeAndActionsVec, 1> &Actions =
+        ScalarInVectorActions[OpcodeIdx];
+    setActions(TypeIndex, Actions, SizeAndActions);
+  }
+
+  /// See also setScalarInVectorAction.
+  /// This function let's you specify the number of elements in a vector that
+  /// are legal for a legal element size.
+  void setVectorNumElementAction(const unsigned Opcode,
+                                 const unsigned TypeIndex,
+                                 const unsigned ElementSize,
+                                 const SizeAndActionsVec &SizeAndActions) {
+    const unsigned OpcodeIdx = Opcode - FirstOp;
+    if (NumElements2Actions[OpcodeIdx].find(ElementSize) ==
+        NumElements2Actions[OpcodeIdx].end())
+      NumElements2Actions[OpcodeIdx][ElementSize] = {{}};
+    SmallVector<SizeAndActionsVec, 1> &Actions =
+        NumElements2Actions[OpcodeIdx].find(ElementSize)->second;
+    setActions(TypeIndex, Actions, SizeAndActions);
+  }
 
-  SmallVector<TypeMap, 1> Actions[LastOp - FirstOp + 1];
-  SIVActionMap ScalarInVectorActions;
-  DenseMap<std::pair<unsigned, LLT>, uint16_t> MaxLegalVectorElts;
-  DenseMap<unsigned, LegalizeAction> DefaultActions;
+  /// A partial SizeAndActionsVec potentially doesn't cover all bit sizes,
+  /// i.e. it's OK if it doesn't start from size 1.
+  static void checkPartialSizeAndActionsVector(const SizeAndActionsVec& v) {
+#ifndef NDEBUG
+    // The sizes should be in increasing order
+    int prev_size = -1;
+    for(auto SizeAndAction: v) {
+      assert(SizeAndAction.first > prev_size);
+      prev_size = SizeAndAction.first;
+    }
+    // - for every Widen action, there should be a larger bitsize that
+    //   can be legalized towards (e.g. Legal, Lower, Libcall or Custom
+    //   action).
+    // - for every Narrow action, there should be a smaller bitsize that
+    //   can be legalized towards.
+    int SmallestNarrowIdx = -1;
+    int LargestWidenIdx = -1;
+    int SmallestLegalizableToSameSizeIdx = -1;
+    int LargestLegalizableToSameSizeIdx = -1;
+    for(size_t i=0; i<v.size(); ++i) {
+      switch (v[i].second) {
+        case FewerElements:
+        case NarrowScalar:
+          if (SmallestNarrowIdx == -1)
+            SmallestNarrowIdx = i;
+          break;
+        case WidenScalar:
+        case MoreElements:
+          LargestWidenIdx = i;
+          break;
+        case Unsupported:
+          break;
+        default:
+          if (SmallestLegalizableToSameSizeIdx == -1)
+            SmallestLegalizableToSameSizeIdx = i;
+          LargestLegalizableToSameSizeIdx = i;
+      }
+    }
+    if (SmallestNarrowIdx != -1) {
+      assert(SmallestLegalizableToSameSizeIdx != -1);
+      assert(SmallestNarrowIdx > SmallestLegalizableToSameSizeIdx);
+    }
+    if (LargestWidenIdx != -1)
+      assert(LargestWidenIdx < LargestLegalizableToSameSizeIdx);
+#endif
+  }
+
+  /// A full SizeAndActionsVec must cover all bit sizes, i.e. must start with
+  /// from size 1.
+  static void checkFullSizeAndActionsVector(const SizeAndActionsVec& v) {
+#ifndef NDEBUG
+    // Data structure invariant: The first bit size must be size 1.
+    assert(v.size() >= 1);
+    assert(v[0].first == 1);
+    checkPartialSizeAndActionsVector(v);
+#endif
+  }
+
+  /// Sets actions for all bit sizes on a particular generic opcode, type
+  /// index and scalar or pointer type.
+  void setActions(unsigned TypeIndex,
+                  SmallVector<SizeAndActionsVec, 1> &Actions,
+                  const SizeAndActionsVec &SizeAndActions) {
+    checkFullSizeAndActionsVector(SizeAndActions);
+    if (Actions.size() <= TypeIndex)
+      Actions.resize(TypeIndex + 1);
+    Actions[TypeIndex] = SizeAndActions;
+  }
+
+  static SizeAndAction findAction(const SizeAndActionsVec &Vec,
+                                  const uint32_t Size);
+
+  /// Returns the next action needed to get the scalar or pointer type closer
+  /// to being legal
+  /// E.g. findLegalAction({G_REM, 13}) should return
+  /// (WidenScalar, 32). After that, findLegalAction({G_REM, 32}) will
+  /// probably be called, which should return (Lower, 32).
+  /// This is assuming the setScalarAction on G_REM was something like:
+  /// setScalarAction(G_REM, 0,
+  ///           {{1, WidenScalar},  // bit sizes [ 1, 31[
+  ///            {32, Lower},       // bit sizes [32, 33[
+  ///            {33, NarrowScalar} // bit sizes [65, +inf[
+  ///           });
+  std::pair<LegalizeAction, LLT>
+  findScalarLegalAction(const InstrAspect &Aspect) const;
+
+  /// Returns the next action needed towards legalizing the vector type.
+  std::pair<LegalizeAction, LLT>
+  findVectorLegalAction(const InstrAspect &Aspect) const;
+
+  static const int FirstOp = TargetOpcode::PRE_ISEL_GENERIC_OPCODE_START;
+  static const int LastOp = TargetOpcode::PRE_ISEL_GENERIC_OPCODE_END;
 
-  bool TablesInitialized = false;
+  // Data structures used temporarily during construction of legality data:
+  typedef DenseMap<LLT, LegalizeAction> TypeMap;
+  SmallVector<TypeMap, 1> SpecifiedActions[LastOp - FirstOp + 1];
+  SmallVector<SizeChangeStrategy, 1>
+      ScalarSizeChangeStrategies[LastOp - FirstOp + 1];
+  SmallVector<SizeChangeStrategy, 1>
+      VectorElementSizeChangeStrategies[LastOp - FirstOp + 1];
+  bool TablesInitialized;
+
+  // Data structures used by getAction:
+  SmallVector<SizeAndActionsVec, 1> ScalarActions[LastOp - FirstOp + 1];
+  SmallVector<SizeAndActionsVec, 1> ScalarInVectorActions[LastOp - FirstOp + 1];
+  std::unordered_map<uint16_t, SmallVector<SizeAndActionsVec, 1>>
+      AddrSpace2PointerActions[LastOp - FirstOp + 1];
+  std::unordered_map<uint16_t, SmallVector<SizeAndActionsVec, 1>>
+      NumElements2Actions[LastOp - FirstOp + 1];
 };
 
-} // end namespace llvm
+} // end namespace llvm.
 
 #endif // LLVM_CODEGEN_GLOBALISEL_LEGALIZERINFO_H
diff --git a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index 85e6fef1f3c26..aa875c11d86fa 100644
--- a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -70,13 +70,33 @@ class MachineIRBuilder {
     return getMF().getRegInfo().createVirtualRegister(RC);
   }
 
-  unsigned getRegFromArg(unsigned Reg) { return Reg; }
+  void addUseFromArg(MachineInstrBuilder &MIB, unsigned Reg) {
+    MIB.addUse(Reg);
+  }
 
+  void addUseFromArg(MachineInstrBuilder &MIB, const MachineInstrBuilder &UseMIB) {
+    MIB.addUse(UseMIB->getOperand(0).getReg());
+  }
+
+  void addUsesFromArgs(MachineInstrBuilder &MIB) { }
+  template<typename UseArgTy, typename ... UseArgsTy>
+  void addUsesFromArgs(MachineInstrBuilder &MIB, UseArgTy &&Arg1, UseArgsTy &&... Args) {
+    addUseFromArg(MIB, Arg1);
+    addUsesFromArgs(MIB, std::forward<UseArgsTy>(Args)...);
+  }
+  unsigned getRegFromArg(unsigned Reg) { return Reg; }
   unsigned getRegFromArg(const MachineInstrBuilder &MIB) {
     return MIB->getOperand(0).getReg();
   }
 
 public:
+  /// Some constructors for easy use.
+  MachineIRBuilder() = default;
+  MachineIRBuilder(MachineFunction &MF) { setMF(MF); }
+  MachineIRBuilder(MachineInstr &MI) : MachineIRBuilder(*MI.getMF()) {
+    setInstr(MI);
+  }
+
   /// Getter for the function we currently build.
   MachineFunction &getMF() {
     assert(MF && "MachineFunction is not set");
@@ -146,9 +166,7 @@ public:
   MachineInstrBuilder buildInstr(unsigned Opc, DstTy &&Ty,
                                  UseArgsTy &&... Args) {
     auto MIB = buildInstr(Opc).addDef(getDestFromArg(Ty));
-    unsigned It[] = {(getRegFromArg(Args))...};
-    for (const auto &i : It)
-      MIB.addUse(i);
+    addUsesFromArgs(MIB, std::forward<UseArgsTy>(Args)...);
     return MIB;
   }
 
@@ -168,11 +186,12 @@ public:
                                           const MDNode *Expr);
 
   /// Build and insert a DBG_VALUE instruction expressing the fact that the
-  /// associated \p Variable lives in memory at \p Reg + \p Offset (suitably
-  /// modified by \p Expr).
-  MachineInstrBuilder buildIndirectDbgValue(unsigned Reg, unsigned Offset,
+  /// associated \p Variable lives in memory at \p Reg (suitably modified by \p
+  /// Expr).
+  MachineInstrBuilder buildIndirectDbgValue(unsigned Reg,
                                             const MDNode *Variable,
                                             const MDNode *Expr);
+
   /// Build and insert a DBG_VALUE instruction expressing the fact that the
   /// associated \p Variable lives in the stack slot specified by \p FI
   /// (suitably modified by \p Expr).
@@ -181,11 +200,11 @@ public:
 
   /// Build and insert a DBG_VALUE instructions specifying that \p Variable is
   /// given by \p C (suitably modified by \p Expr).
-  MachineInstrBuilder buildConstDbgValue(const Constant &C, unsigned Offset,
+  MachineInstrBuilder buildConstDbgValue(const Constant &C,
                                          const MDNode *Variable,
                                          const MDNode *Expr);
 
-  /// Build and insert \p Res<def> = G_FRAME_INDEX \p Idx
+  /// Build and insert \p Res = G_FRAME_INDEX \p Idx
   ///
   /// G_FRAME_INDEX materializes the address of an alloca value or other
   /// stack-based object.
@@ -196,7 +215,7 @@ public:
   /// \return a MachineInstrBuilder for the newly created instruction.
   MachineInstrBuilder buildFrameIndex(unsigned Res, int Idx);
 
-  /// Build and insert \p Res<def> = G_GLOBAL_VALUE \p GV
+  /// Build and insert \p Res = G_GLOBAL_VALUE \p GV
   ///
   /// G_GLOBAL_VALUE materializes the address of the specified global
   /// into \p Res.
@@ -208,7 +227,7 @@ public:
   /// \return a MachineInstrBuilder for the newly created instruction.
   MachineInstrBuilder buildGlobalValue(unsigned Res, const GlobalValue *GV);
 
-  /// Build and insert \p Res<def> = G_ADD \p Op0, \p Op1
+  /// Build and insert \p Res = G_ADD \p Op0, \p Op1
   ///
   /// G_ADD sets \p Res to the sum of integer parameters \p Op0 and \p Op1,
   /// truncated to their width.
@@ -226,7 +245,7 @@ public:
     return buildAdd(Res, (getRegFromArg(UseArgs))...);
   }
 
-  /// Build and insert \p Res<def> = G_SUB \p Op0, \p Op1
+  /// Build and insert \p Res = G_SUB \p Op0, \p Op1
   ///
   /// G_SUB sets \p Res to the sum of integer parameters \p Op0 and \p Op1,
   /// truncated to their width.
@@ -239,7 +258,7 @@ public:
   MachineInstrBuilder buildSub(unsigned Res, unsigned Op0,
                                unsigned Op1);
 
-  /// Build and insert \p Res<def> = G_MUL \p Op0, \p Op1
+  /// Build and insert \p Res = G_MUL \p Op0, \p Op1
   ///
   /// G_MUL sets \p Res to the sum of integer parameters \p Op0 and \p Op1,
   /// truncated to their width.
@@ -252,7 +271,7 @@ public:
   MachineInstrBuilder buildMul(unsigned Res, unsigned Op0,
                                unsigned Op1);
 
-  /// Build and insert \p Res<def> = G_GEP \p Op0, \p Op1
+  /// Build and insert \p Res = G_GEP \p Op0, \p Op1
   ///
   /// G_GEP adds \p Op1 bytes to the pointer specified by \p Op0,
   /// storing the resulting pointer in \p Res.
@@ -266,7 +285,7 @@ public:
   MachineInstrBuilder buildGEP(unsigned Res, unsigned Op0,
                                unsigned Op1);
 
-  /// Materialize and insert \p Res<def> = G_GEP \p Op0, (G_CONSTANT \p Value)
+  /// Materialize and insert \p Res = G_GEP \p Op0, (G_CONSTANT \p Value)
   ///
   /// G_GEP adds \p Value bytes to the pointer specified by \p Op0,
   /// storing the resulting pointer in \p Res. If \p Value is zero then no
@@ -286,7 +305,7 @@ public:
                                                const LLT &ValueTy,
                                                uint64_t Value);
 
-  /// Build and insert \p Res<def> = G_PTR_MASK \p Op0, \p NumBits
+  /// Build and insert \p Res = G_PTR_MASK \p Op0, \p NumBits
   ///
   /// G_PTR_MASK clears the low bits of a pointer operand without destroying its
   /// pointer properties. This has the effect of rounding the address *down* to
@@ -302,7 +321,7 @@ public:
   MachineInstrBuilder buildPtrMask(unsigned Res, unsigned Op0,
                                    uint32_t NumBits);
 
-  /// Build and insert \p Res<def>, \p CarryOut<def> = G_UADDE \p Op0,
+  /// Build and insert \p Res, \p CarryOut = G_UADDE \p Op0,
   /// \p Op1, \p CarryIn
   ///
   /// G_UADDE sets \p Res to \p Op0 + \p Op1 + \p CarryIn (truncated to the bit
@@ -319,7 +338,7 @@ public:
   MachineInstrBuilder buildUAdde(unsigned Res, unsigned CarryOut, unsigned Op0,
                                  unsigned Op1, unsigned CarryIn);
 
-  /// Build and insert \p Res<def> = G_AND \p Op0, \p Op1
+  /// Build and insert \p Res = G_AND \p Op0, \p Op1
   ///
   /// G_AND sets \p Res to the bitwise and of integer parameters \p Op0 and \p
   /// Op1.
@@ -329,10 +348,14 @@ public:
   ///      with the same (scalar or vector) type).
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
+  template <typename DstTy, typename... UseArgsTy>
+  MachineInstrBuilder buildAnd(DstTy &&Dst, UseArgsTy &&... UseArgs) {
+    return buildAnd(getDestFromArg(Dst), getRegFromArg(UseArgs)...);
+  }
   MachineInstrBuilder buildAnd(unsigned Res, unsigned Op0,
                                unsigned Op1);
 
-  /// Build and insert \p Res<def> = G_OR \p Op0, \p Op1
+  /// Build and insert \p Res = G_OR \p Op0, \p Op1
   ///
   /// G_OR sets \p Res to the bitwise or of integer parameters \p Op0 and \p
   /// Op1.
@@ -344,7 +367,7 @@ public:
   /// \return a MachineInstrBuilder for the newly created instruction.
   MachineInstrBuilder buildOr(unsigned Res, unsigned Op0, unsigned Op1);
 
-  /// Build and insert \p Res<def> = G_ANYEXT \p Op0
+  /// Build and insert \p Res = G_ANYEXT \p Op0
   ///
   /// G_ANYEXT produces a register of the specified width, with bits 0 to
   /// sizeof(\p Ty) * 8 set to \p Op. The remaining bits are unspecified
@@ -357,9 +380,14 @@ public:
   /// \pre \p Op must be smaller than \p Res
   ///
   /// \return The newly created instruction.
+
   MachineInstrBuilder buildAnyExt(unsigned Res, unsigned Op);
+  template <typename DstType, typename ArgType>
+  MachineInstrBuilder buildAnyExt(DstType &&Res, ArgType &&Arg) {
+    return buildAnyExt(getDestFromArg(Res), getRegFromArg(Arg));
+  }
 
-  /// Build and insert \p Res<def> = G_SEXT \p Op
+  /// Build and insert \p Res = G_SEXT \p Op
   ///
   /// G_SEXT produces a register of the specified width, with bits 0 to
   /// sizeof(\p Ty) * 8 set to \p Op. The remaining bits are duplicated from the
@@ -373,7 +401,7 @@ public:
   /// \return The newly created instruction.
   MachineInstrBuilder buildSExt(unsigned Res, unsigned Op);
 
-  /// Build and insert \p Res<def> = G_ZEXT \p Op
+  /// Build and insert \p Res = G_ZEXT \p Op
   ///
   /// G_ZEXT produces a register of the specified width, with bits 0 to
   /// sizeof(\p Ty) * 8 set to \p Op. The remaining bits are 0. For a vector
@@ -387,7 +415,7 @@ public:
   /// \return The newly created instruction.
   MachineInstrBuilder buildZExt(unsigned Res, unsigned Op);
 
-  /// Build and insert \p Res<def> = G_SEXT \p Op, \p Res = G_TRUNC \p Op, or
+  /// Build and insert \p Res = G_SEXT \p Op, \p Res = G_TRUNC \p Op, or
   /// \p Res = COPY \p Op depending on the differing sizes of \p Res and \p Op.
   ///  ///
   /// \pre setBasicBlock or setMI must have been called.
@@ -397,7 +425,7 @@ public:
   /// \return The newly created instruction.
   MachineInstrBuilder buildSExtOrTrunc(unsigned Res, unsigned Op);
 
-  /// Build and insert \p Res<def> = G_ZEXT \p Op, \p Res = G_TRUNC \p Op, or
+  /// Build and insert \p Res = G_ZEXT \p Op, \p Res = G_TRUNC \p Op, or
   /// \p Res = COPY \p Op depending on the differing sizes of \p Res and \p Op.
   ///  ///
   /// \pre setBasicBlock or setMI must have been called.
@@ -407,6 +435,32 @@ public:
   /// \return The newly created instruction.
   MachineInstrBuilder buildZExtOrTrunc(unsigned Res, unsigned Op);
 
+  // Build and insert \p Res = G_ANYEXT \p Op, \p Res = G_TRUNC \p Op, or
+  /// \p Res = COPY \p Op depending on the differing sizes of \p Res and \p Op.
+  ///  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p Res must be a generic virtual register with scalar or vector type.
+  /// \pre \p Op must be a generic virtual register with scalar or vector type.
+  ///
+  /// \return The newly created instruction.
+  template <typename DstTy, typename UseArgTy>
+  MachineInstrBuilder buildAnyExtOrTrunc(DstTy &&Dst, UseArgTy &&Use) {
+    return buildAnyExtOrTrunc(getDestFromArg(Dst), getRegFromArg(Use));
+  }
+  MachineInstrBuilder buildAnyExtOrTrunc(unsigned Res, unsigned Op);
+
+  /// Build and insert \p Res = \p ExtOpc, \p Res = G_TRUNC \p
+  /// Op, or \p Res = COPY \p Op depending on the differing sizes of \p Res and
+  /// \p Op.
+  ///  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p Res must be a generic virtual register with scalar or vector type.
+  /// \pre \p Op must be a generic virtual register with scalar or vector type.
+  ///
+  /// \return The newly created instruction.
+  MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, unsigned Res,
+                                      unsigned Op);
+
   /// Build and insert an appropriate cast between two registers of equal size.
   MachineInstrBuilder buildCast(unsigned Dst, unsigned Src);
 
@@ -480,7 +534,7 @@ public:
   /// \return The newly created instruction.
   MachineInstrBuilder buildFConstant(unsigned Res, const ConstantFP &Val);
 
-  /// Build and insert \p Res<def> = COPY Op
+  /// Build and insert \p Res = COPY Op
   ///
   /// Register-to-register COPY sets \p Res to \p Op.
   ///
@@ -488,8 +542,12 @@ public:
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
   MachineInstrBuilder buildCopy(unsigned Res, unsigned Op);
+  template <typename DstType, typename SrcType>
+  MachineInstrBuilder buildCopy(DstType &&Res, SrcType &&Src) {
+    return buildCopy(getDestFromArg(Res), getRegFromArg(Src));
+  }
 
-  /// Build and insert `Res<def> = G_LOAD Addr, MMO`.
+  /// Build and insert `Res = G_LOAD Addr, MMO`.
   ///
   /// Loads the value stored at \p Addr. Puts the result in \p Res.
   ///
@@ -513,7 +571,7 @@ public:
   MachineInstrBuilder buildStore(unsigned Val, unsigned Addr,
                                  MachineMemOperand &MMO);
 
-  /// Build and insert `Res0<def>, ... = G_EXTRACT Src, Idx0`.
+  /// Build and insert `Res0, ... = G_EXTRACT Src, Idx0`.
   ///
   /// \pre setBasicBlock or setMI must have been called.
   /// \pre \p Res and \p Src must be generic virtual registers.
@@ -540,7 +598,7 @@ public:
   void buildSequence(unsigned Res, ArrayRef<unsigned> Ops,
                      ArrayRef<uint64_t> Indices);
 
-  /// Build and insert \p Res<def> = G_MERGE_VALUES \p Op0, ...
+  /// Build and insert \p Res = G_MERGE_VALUES \p Op0, ...
   ///
   /// G_MERGE_VALUES combines the input elements contiguously into a larger
   /// register.
@@ -553,7 +611,7 @@ public:
   /// \return a MachineInstrBuilder for the newly created instruction.
   MachineInstrBuilder buildMerge(unsigned Res, ArrayRef<unsigned> Ops);
 
-  /// Build and insert \p Res0<def>, ... = G_UNMERGE_VALUES \p Op
+  /// Build and insert \p Res0, ... = G_UNMERGE_VALUES \p Op
   ///
   /// G_UNMERGE_VALUES splits contiguous bits of the input into multiple
   ///
@@ -581,7 +639,7 @@ public:
   MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, unsigned Res,
                                      bool HasSideEffects);
 
-  /// Build and insert \p Res<def> = G_FPTRUNC \p Op
+  /// Build and insert \p Res = G_FPTRUNC \p Op
   ///
   /// G_FPTRUNC converts a floating-point value into one with a smaller type.
   ///
@@ -593,7 +651,7 @@ public:
   /// \return The newly created instruction.
   MachineInstrBuilder buildFPTrunc(unsigned Res, unsigned Op);
 
-  /// Build and insert \p Res<def> = G_TRUNC \p Op
+  /// Build and insert \p Res = G_TRUNC \p Op
   ///
   /// G_TRUNC extracts the low bits of a type. For a vector type each element is
   /// truncated independently before being packed into the destination.
@@ -605,6 +663,10 @@ public:
   ///
   /// \return The newly created instruction.
   MachineInstrBuilder buildTrunc(unsigned Res, unsigned Op);
+  template <typename DstType, typename SrcType>
+  MachineInstrBuilder buildTrunc(DstType &&Res, SrcType &&Src) {
+    return buildTrunc(getDestFromArg(Res), getRegFromArg(Src));
+  }
 
   /// Build and insert a \p Res = G_ICMP \p Pred, \p Op0, \p Op1
   ///
@@ -649,7 +711,7 @@ public:
   MachineInstrBuilder buildSelect(unsigned Res, unsigned Tst,
                                   unsigned Op0, unsigned Op1);
 
-  /// Build and insert \p Res<def> = G_INSERT_VECTOR_ELT \p Val,
+  /// Build and insert \p Res = G_INSERT_VECTOR_ELT \p Val,
   /// \p Elt, \p Idx
   ///
   /// \pre setBasicBlock or setMI must have been called.
@@ -662,7 +724,7 @@ public:
   MachineInstrBuilder buildInsertVectorElement(unsigned Res, unsigned Val,
                                                unsigned Elt, unsigned Idx);
 
-  /// Build and insert \p Res<def> = G_EXTRACT_VECTOR_ELT \p Val, \p Idx
+  /// Build and insert \p Res = G_EXTRACT_VECTOR_ELT \p Val, \p Idx
   ///
   /// \pre setBasicBlock or setMI must have been called.
   /// \pre \p Res must be a generic virtual register with scalar type.
@@ -672,6 +734,24 @@ public:
   /// \return The newly created instruction.
   MachineInstrBuilder buildExtractVectorElement(unsigned Res, unsigned Val,
                                                 unsigned Idx);
+
+  /// Build and insert `OldValRes = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal,
+  /// MMO`.
+  ///
+  /// Atomically replace the value at \p Addr with \p NewVal if it is currently
+  /// \p CmpVal otherwise leaves it unchanged. Puts the original value from \p
+  /// Addr in \p Res.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p OldValRes must be a generic virtual register of scalar type.
+  /// \pre \p Addr must be a generic virtual register with pointer type.
+  /// \pre \p OldValRes, \p CmpVal, and \p NewVal must be generic virtual
+  ///      registers of the same type.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr,
+                                         unsigned CmpVal, unsigned NewVal,
+                                         MachineMemOperand &MMO);
 };
 
 } // End namespace llvm.
diff --git a/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h b/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
index 60905c7ec226d..02868b220984d 100644
--- a/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
+++ b/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
@@ -407,6 +407,10 @@ protected:
   mutable DenseMap<unsigned, std::unique_ptr<const InstructionMapping>>
       MapOfInstructionMappings;
 
+  /// Getting the minimal register class of a physreg is expensive.
+  /// Cache this information as we get it.
+  mutable DenseMap<unsigned, const TargetRegisterClass *> PhysRegMinimalRCs;
+
   /// Create a RegisterBankInfo that can accommodate up to \p NumRegBanks
   /// RegisterBank instances.
   RegisterBankInfo(RegisterBank **RegBanks, unsigned NumRegBanks);
@@ -427,6 +431,11 @@ protected:
     return *RegBanks[ID];
   }
 
+  /// Get the MinimalPhysRegClass for Reg.
+  /// \pre Reg is a physical register.
+  const TargetRegisterClass &
+  getMinimalPhysRegClass(unsigned Reg, const TargetRegisterInfo &TRI) const;
+
   /// Try to get the mapping of \p MI.
   /// See getInstrMapping for more details on what a mapping represents.
   ///
@@ -699,8 +708,8 @@ public:
   /// virtual register.
   ///
   /// \pre \p Reg != 0 (NoRegister).
-  static unsigned getSizeInBits(unsigned Reg, const MachineRegisterInfo &MRI,
-                                const TargetRegisterInfo &TRI);
+  unsigned getSizeInBits(unsigned Reg, const MachineRegisterInfo &MRI,
+                         const TargetRegisterInfo &TRI) const;
 
   /// Check that information hold by this instance make sense for the
   /// given \p TRI.
diff --git a/include/llvm/CodeGen/GlobalISel/Utils.h b/include/llvm/CodeGen/GlobalISel/Utils.h
index 50ddbeb9432a3..5864c15cc8eb9 100644
--- a/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -79,5 +79,11 @@ Optional<int64_t> getConstantVRegVal(unsigned VReg,
 const ConstantFP* getConstantFPVRegVal(unsigned VReg,
                                        const MachineRegisterInfo &MRI);
 
+/// See if Reg is defined by an single def instruction that is
+/// Opcode. Also try to do trivial folding if it's a COPY with
+/// same types. Returns null otherwise.
+MachineInstr *getOpcodeDef(unsigned Opcode, unsigned Reg,
+                           const MachineRegisterInfo &MRI);
+
 } // End namespace llvm.
 #endif
diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h
index bc5d2353f63e3..d256849be9afb 100644
--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@@ -186,7 +186,8 @@ namespace ISD {
     /// BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
     /// Given two values of the same integer value type, this produces a value
     /// twice as big.  Like EXTRACT_ELEMENT, this can only be used before
-    /// legalization.
+    /// legalization. The lower part of the composite value should be in
+    /// element 0 and the upper part should be in element 1.
     BUILD_PAIR,
 
     /// MERGE_VALUES - This node takes multiple discrete operands and returns
@@ -263,6 +264,7 @@ namespace ISD {
     /// They are used to limit optimizations while the DAG is being
     /// optimized.
     STRICT_FADD, STRICT_FSUB, STRICT_FMUL, STRICT_FDIV, STRICT_FREM,
+    STRICT_FMA,
 
     /// Constrained versions of libm-equivalent floating point intrinsics.
     /// These will be lowered to the equivalent non-constrained pseudo-op
@@ -637,6 +639,12 @@ namespace ISD {
     /// take a chain as input and return a chain.
     EH_LABEL,
 
+    /// ANNOTATION_LABEL - Represents a mid basic block label used by
+    /// annotations. This should remain within the basic block and be ordered
+    /// with respect to other call instructions, but loads and stores may float
+    /// past it.
+    ANNOTATION_LABEL,
+
     /// CATCHPAD - Represents a catchpad instruction.
     CATCHPAD,
 
@@ -831,7 +839,7 @@ namespace ISD {
   /// which do not reference a specific memory location should be less than
   /// this value. Those that do must not be less than this value, and can
   /// be used with SelectionDAG::getMemIntrinsicNode.
-  static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END+300;
+  static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END+400;
 
   //===--------------------------------------------------------------------===//
   /// MemIndexedMode enum - This enum defines the load / store indexed
diff --git a/include/llvm/CodeGen/IntrinsicLowering.h b/include/llvm/CodeGen/IntrinsicLowering.h
index a404b9b70d3ac..597d684909c16 100644
--- a/include/llvm/CodeGen/IntrinsicLowering.h
+++ b/include/llvm/CodeGen/IntrinsicLowering.h
@@ -31,26 +31,22 @@ class IntrinsicLowering {
 public:
   explicit IntrinsicLowering(const DataLayout &DL) : DL(DL), Warned(false) {}
 
-  /// AddPrototypes - This method, if called, causes all of the prototypes
-  /// that might be needed by an intrinsic lowering implementation to be
-  /// inserted into the module specified.
+  /// Add all of the prototypes that might be needed by an intrinsic lowering
+  /// implementation to be inserted into the module specified.
   void AddPrototypes(Module &M);
 
-  /// LowerIntrinsicCall - This method replaces a call with the LLVM function
-  /// which should be used to implement the specified intrinsic function call.
+  /// Replace a call to the specified intrinsic function.
   /// If an intrinsic function must be implemented by the code generator
   /// (such as va_start), this function should print a message and abort.
   ///
   /// Otherwise, if an intrinsic function call can be lowered, the code to
   /// implement it (often a call to a non-intrinsic function) is inserted
-  /// _after_ the call instruction and the call is deleted.  The caller must
+  /// _after_ the call instruction and the call is deleted. The caller must
   /// be capable of handling this kind of change.
-  ///
   void LowerIntrinsicCall(CallInst *CI);
 
-  /// LowerToByteSwap - Replace a call instruction into a call to bswap
-  /// intrinsic. Return false if it has determined the call is not a
-  /// simple integer bswap.
+  /// Try to replace a call instruction with a call to a bswap intrinsic. Return
+  /// false if the call is not a simple integer bswap.
   static bool LowerToByteSwap(CallInst *CI);
 };
 }
diff --git a/include/llvm/CodeGen/LatencyPriorityQueue.h b/include/llvm/CodeGen/LatencyPriorityQueue.h
index f347f66e0981c..988e6d6cb3a3c 100644
--- a/include/llvm/CodeGen/LatencyPriorityQueue.h
+++ b/include/llvm/CodeGen/LatencyPriorityQueue.h
@@ -22,7 +22,7 @@ namespace llvm {
   class LatencyPriorityQueue;
 
   /// Sorting functions for the Available queue.
-  struct latency_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+  struct latency_sort {
     LatencyPriorityQueue *PQ;
     explicit latency_sort(LatencyPriorityQueue *pq) : PQ(pq) {}
 
diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervals.h
index 820e883624837..1150f3c1c47b4 100644
--- a/include/llvm/CodeGen/LiveIntervalAnalysis.h
+++ b/include/llvm/CodeGen/LiveIntervals.h
@@ -1,4 +1,4 @@
-//===- LiveIntervalAnalysis.h - Live Interval Analysis ----------*- C++ -*-===//
+//===- LiveIntervals.h - Live Interval Analysis -----------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -17,8 +17,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_CODEGEN_LIVEINTERVALANALYSIS_H
-#define LLVM_CODEGEN_LIVEINTERVALANALYSIS_H
+#ifndef LLVM_CODEGEN_LIVEINTERVALS_H
+#define LLVM_CODEGEN_LIVEINTERVALS_H
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/IndexedMap.h"
@@ -28,11 +28,11 @@
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/MC/LaneBitmask.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include <cassert>
 #include <cstdint>
 #include <utility>
@@ -107,6 +107,11 @@ class VirtRegMap;
                                 const MachineBlockFrequencyInfo *MBFI,
                                 const MachineInstr &Instr);
 
+    /// Calculate the spill weight to assign to a single instruction.
+    static float getSpillWeight(bool isDef, bool isUse,
+                                const MachineBlockFrequencyInfo *MBFI,
+                                const MachineBasicBlock *MBB);
+
     LiveInterval &getInterval(unsigned Reg) {
       if (hasInterval(Reg))
         return *VirtRegIntervals[Reg];
@@ -473,4 +478,4 @@ class VirtRegMap;
 
 } // end namespace llvm
 
-#endif // LLVM_CODEGEN_LIVEINTERVALANALYSIS_H
+#endif
diff --git a/include/llvm/CodeGen/LivePhysRegs.h b/include/llvm/CodeGen/LivePhysRegs.h
index f9c741dd75b2d..f9aab0d09e1f7 100644
--- a/include/llvm/CodeGen/LivePhysRegs.h
+++ b/include/llvm/CodeGen/LivePhysRegs.h
@@ -20,11 +20,11 @@
 /// register.
 ///
 /// X86 Example:
-/// %YMM0<def> = ...
-/// %XMM0<def> = ... (Kills %XMM0, all %XMM0s sub-registers, and %YMM0)
+/// %ymm0 = ...
+/// %xmm0 = ... (Kills %xmm0, all %xmm0s sub-registers, and %ymm0)
 ///
-/// %YMM0<def> = ...
-/// %XMM0<def> = ..., %YMM0<imp-use> (%YMM0 and all its sub-registers are alive)
+/// %ymm0 = ...
+/// %xmm0 = ..., implicit %ymm0 (%ymm0 and all its sub-registers are alive)
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CODEGEN_LIVEPHYSREGS_H
@@ -32,8 +32,8 @@
 
 #include "llvm/ADT/SparseSet.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include <cassert>
 #include <utility>
 
@@ -108,6 +108,12 @@ public:
   /// Returns true if register \p Reg and no aliasing register is in the set.
   bool available(const MachineRegisterInfo &MRI, unsigned Reg) const;
 
+  /// Remove defined registers and regmask kills from the set.
+  void removeDefs(const MachineInstr &MI);
+
+  /// Add uses to the set.
+  void addUses(const MachineInstr &MI);
+
   /// Simulates liveness when stepping backwards over an instruction(bundle).
   /// Remove Defs, add uses. This is the recommended way of calculating
   /// liveness.
@@ -152,6 +158,10 @@ private:
   /// \brief Adds live-in registers from basic block \p MBB, taking associated
   /// lane masks into consideration.
   void addBlockLiveIns(const MachineBasicBlock &MBB);
+
+  /// Adds pristine registers. Pristine registers are callee saved registers
+  /// that are unused in the function.
+  void addPristines(const MachineFunction &MF);
 };
 
 inline raw_ostream &operator<<(raw_ostream &OS, const LivePhysRegs& LR) {
@@ -159,12 +169,21 @@ inline raw_ostream &operator<<(raw_ostream &OS, const LivePhysRegs& LR) {
   return OS;
 }
 
-/// \brief Computes the live-in list for \p MBB assuming all of its successors
-/// live-in lists are up-to-date. Uses the given LivePhysReg instance \p
-/// LiveRegs; This is just here to avoid repeated heap allocations when calling
-/// this multiple times in a pass.
-void computeLiveIns(LivePhysRegs &LiveRegs, const MachineRegisterInfo &MRI,
-                    MachineBasicBlock &MBB);
+/// \brief Computes registers live-in to \p MBB assuming all of its successors
+/// live-in lists are up-to-date. Puts the result into the given LivePhysReg
+/// instance \p LiveRegs.
+void computeLiveIns(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB);
+
+/// Recomputes dead and kill flags in \p MBB.
+void recomputeLivenessFlags(MachineBasicBlock &MBB);
+
+/// Adds registers contained in \p LiveRegs to the block live-in list of \p MBB.
+/// Does not add reserved registers.
+void addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs);
+
+/// Convenience function combining computeLiveIns() and addLiveIns().
+void computeAndAddLiveIns(LivePhysRegs &LiveRegs,
+                          MachineBasicBlock &MBB);
 
 } // end namespace llvm
 
diff --git a/include/llvm/CodeGen/LiveRangeEdit.h b/include/llvm/CodeGen/LiveRangeEdit.h
index 362d9854a271a..84bccde0caa23 100644
--- a/include/llvm/CodeGen/LiveRangeEdit.h
+++ b/include/llvm/CodeGen/LiveRangeEdit.h
@@ -29,7 +29,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SlotIndexes.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include <cassert>
 
 namespace llvm {
diff --git a/include/llvm/CodeGen/LiveRegUnits.h b/include/llvm/CodeGen/LiveRegUnits.h
index c28b1a06854fc..dc4956da9637c 100644
--- a/include/llvm/CodeGen/LiveRegUnits.h
+++ b/include/llvm/CodeGen/LiveRegUnits.h
@@ -16,9 +16,9 @@
 #define LLVM_CODEGEN_LIVEREGUNITS_H
 
 #include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/MC/LaneBitmask.h"
 #include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include <cstdint>
 
 namespace llvm {
@@ -51,7 +51,7 @@ public:
   void clear() { Units.reset(); }
 
   /// Returns true if the set is empty.
-  bool empty() const { return Units.empty(); }
+  bool empty() const { return Units.none(); }
 
   /// Adds register units covered by physical register \p Reg.
   void addReg(unsigned Reg) {
@@ -123,6 +123,11 @@ public:
   const BitVector &getBitVector() const {
     return Units;
   }
+
+private:
+  /// Adds pristine registers. Pristine registers are callee saved registers
+  /// that are unused in the function.
+  void addPristines(const MachineFunction &MF);
 };
 
 } // end namespace llvm
diff --git a/include/llvm/CodeGen/LiveVariables.h b/include/llvm/CodeGen/LiveVariables.h
index d6e947c03dbdc..ed8da8662106d 100644
--- a/include/llvm/CodeGen/LiveVariables.h
+++ b/include/llvm/CodeGen/LiveVariables.h
@@ -36,7 +36,7 @@
 #include "llvm/ADT/SparseBitVector.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
 
 namespace llvm {
 
diff --git a/include/llvm/CodeGen/MIRYamlMapping.h b/include/llvm/CodeGen/MIRYamlMapping.h
index 1b1ba6a05837c..ba40e522e261f 100644
--- a/include/llvm/CodeGen/MIRYamlMapping.h
+++ b/include/llvm/CodeGen/MIRYamlMapping.h
@@ -12,12 +12,18 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_LIB_CODEGEN_MIRYAMLMAPPING_H
-#define LLVM_LIB_CODEGEN_MIRYAMLMAPPING_H
+#ifndef LLVM_CODEGEN_MIRYAMLMAPPING_H
+#define LLVM_CODEGEN_MIRYAMLMAPPING_H
 
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Support/SMLoc.h"
 #include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cstdint>
+#include <string>
 #include <vector>
 
 namespace llvm {
@@ -29,7 +35,7 @@ struct StringValue {
   std::string Value;
   SMRange SourceRange;
 
-  StringValue() {}
+  StringValue() = default;
   StringValue(std::string Value) : Value(std::move(Value)) {}
 
   bool operator==(const StringValue &Other) const {
@@ -38,7 +44,7 @@ struct StringValue {
 };
 
 template <> struct ScalarTraits<StringValue> {
-  static void output(const StringValue &S, void *, llvm::raw_ostream &OS) {
+  static void output(const StringValue &S, void *, raw_ostream &OS) {
     OS << S.Value;
   }
 
@@ -50,16 +56,16 @@ template <> struct ScalarTraits<StringValue> {
     return "";
   }
 
-  static bool mustQuote(StringRef Scalar) { return needsQuotes(Scalar); }
+  static QuotingType mustQuote(StringRef S) { return needsQuotes(S); }
 };
 
 struct FlowStringValue : StringValue {
-  FlowStringValue() {}
+  FlowStringValue() = default;
   FlowStringValue(std::string Value) : StringValue(std::move(Value)) {}
 };
 
 template <> struct ScalarTraits<FlowStringValue> {
-  static void output(const FlowStringValue &S, void *, llvm::raw_ostream &OS) {
+  static void output(const FlowStringValue &S, void *, raw_ostream &OS) {
     return ScalarTraits<StringValue>::output(S, nullptr, OS);
   }
 
@@ -67,11 +73,12 @@ template <> struct ScalarTraits<FlowStringValue> {
     return ScalarTraits<StringValue>::input(Scalar, Ctx, S);
   }
 
-  static bool mustQuote(StringRef Scalar) { return needsQuotes(Scalar); }
+  static QuotingType mustQuote(StringRef S) { return needsQuotes(S); }
 };
 
 struct BlockStringValue {
   StringValue Value;
+
   bool operator==(const BlockStringValue &Other) const {
     return Value == Other.Value;
   }
@@ -90,10 +97,10 @@ template <> struct BlockScalarTraits<BlockStringValue> {
 /// A wrapper around unsigned which contains a source range that's being set
 /// during parsing.
 struct UnsignedValue {
-  unsigned Value;
+  unsigned Value = 0;
   SMRange SourceRange;
 
-  UnsignedValue() : Value(0) {}
+  UnsignedValue() = default;
   UnsignedValue(unsigned Value) : Value(Value) {}
 
   bool operator==(const UnsignedValue &Other) const {
@@ -113,7 +120,7 @@ template <> struct ScalarTraits<UnsignedValue> {
     return ScalarTraits<unsigned>::input(Scalar, Ctx, Value.Value);
   }
 
-  static bool mustQuote(StringRef Scalar) {
+  static QuotingType mustQuote(StringRef Scalar) {
     return ScalarTraits<unsigned>::mustQuote(Scalar);
   }
 };
@@ -148,7 +155,9 @@ struct VirtualRegisterDefinition {
   UnsignedValue ID;
   StringValue Class;
   StringValue PreferredRegister;
+
   // TODO: Serialize the target specific register hints.
+
   bool operator==(const VirtualRegisterDefinition &Other) const {
     return ID == Other.ID && Class == Other.Class &&
            PreferredRegister == Other.PreferredRegister;
@@ -169,6 +178,7 @@ template <> struct MappingTraits<VirtualRegisterDefinition> {
 struct MachineFunctionLiveIn {
   StringValue Register;
   StringValue VirtualRegister;
+
   bool operator==(const MachineFunctionLiveIn &Other) const {
     return Register == Other.Register &&
            VirtualRegister == Other.VirtualRegister;
@@ -202,16 +212,21 @@ struct MachineStackObject {
   int64_t Offset = 0;
   uint64_t Size = 0;
   unsigned Alignment = 0;
+  uint8_t StackID = 0;
   StringValue CalleeSavedRegister;
+  bool CalleeSavedRestored = true;
   Optional<int64_t> LocalOffset;
   StringValue DebugVar;
   StringValue DebugExpr;
   StringValue DebugLoc;
+
   bool operator==(const MachineStackObject &Other) const {
     return ID == Other.ID && Name == Other.Name && Type == Other.Type &&
            Offset == Other.Offset && Size == Other.Size &&
            Alignment == Other.Alignment &&
+           StackID == Other.StackID &&
            CalleeSavedRegister == Other.CalleeSavedRegister &&
+           CalleeSavedRestored == Other.CalleeSavedRestored &&
            LocalOffset == Other.LocalOffset && DebugVar == Other.DebugVar &&
            DebugExpr == Other.DebugExpr && DebugLoc == Other.DebugLoc;
   }
@@ -237,8 +252,11 @@ template <> struct MappingTraits<MachineStackObject> {
     if (Object.Type != MachineStackObject::VariableSized)
       YamlIO.mapRequired("size", Object.Size);
     YamlIO.mapOptional("alignment", Object.Alignment, (unsigned)0);
+    YamlIO.mapOptional("stack-id", Object.StackID);
     YamlIO.mapOptional("callee-saved-register", Object.CalleeSavedRegister,
                        StringValue()); // Don't print it out when it's empty.
+    YamlIO.mapOptional("callee-saved-restored", Object.CalleeSavedRestored,
+                       true);
     YamlIO.mapOptional("local-offset", Object.LocalOffset, Optional<int64_t>());
     YamlIO.mapOptional("di-variable", Object.DebugVar,
                        StringValue()); // Don't print it out when it's empty.
@@ -260,14 +278,19 @@ struct FixedMachineStackObject {
   int64_t Offset = 0;
   uint64_t Size = 0;
   unsigned Alignment = 0;
+  uint8_t StackID = 0;
   bool IsImmutable = false;
   bool IsAliased = false;
   StringValue CalleeSavedRegister;
+  bool CalleeSavedRestored = true;
+
   bool operator==(const FixedMachineStackObject &Other) const {
     return ID == Other.ID && Type == Other.Type && Offset == Other.Offset &&
            Size == Other.Size && Alignment == Other.Alignment &&
+           StackID == Other.StackID &&
            IsImmutable == Other.IsImmutable && IsAliased == Other.IsAliased &&
-           CalleeSavedRegister == Other.CalleeSavedRegister;
+           CalleeSavedRegister == Other.CalleeSavedRegister &&
+           CalleeSavedRestored == Other.CalleeSavedRestored;
   }
 };
 
@@ -289,12 +312,15 @@ template <> struct MappingTraits<FixedMachineStackObject> {
     YamlIO.mapOptional("offset", Object.Offset, (int64_t)0);
     YamlIO.mapOptional("size", Object.Size, (uint64_t)0);
     YamlIO.mapOptional("alignment", Object.Alignment, (unsigned)0);
+    YamlIO.mapOptional("stack-id", Object.StackID);
     if (Object.Type != FixedMachineStackObject::SpillSlot) {
       YamlIO.mapOptional("isImmutable", Object.IsImmutable, false);
       YamlIO.mapOptional("isAliased", Object.IsAliased, false);
     }
     YamlIO.mapOptional("callee-saved-register", Object.CalleeSavedRegister,
                        StringValue()); // Don't print it out when it's empty.
+    YamlIO.mapOptional("callee-saved-restored", Object.CalleeSavedRestored,
+                     true);
   }
 
   static const bool flow = true;
@@ -304,9 +330,12 @@ struct MachineConstantPoolValue {
   UnsignedValue ID;
   StringValue Value;
   unsigned Alignment = 0;
+  bool IsTargetSpecific = false;
+
   bool operator==(const MachineConstantPoolValue &Other) const {
     return ID == Other.ID && Value == Other.Value &&
-           Alignment == Other.Alignment;
+           Alignment == Other.Alignment &&
+           IsTargetSpecific == Other.IsTargetSpecific;
   }
 };
 
@@ -315,6 +344,7 @@ template <> struct MappingTraits<MachineConstantPoolValue> {
     YamlIO.mapRequired("id", Constant.ID);
     YamlIO.mapOptional("value", Constant.Value, StringValue());
     YamlIO.mapOptional("alignment", Constant.Alignment, (unsigned)0);
+    YamlIO.mapOptional("isTargetSpecific", Constant.IsTargetSpecific, false);
   }
 };
 
@@ -322,6 +352,7 @@ struct MachineJumpTable {
   struct Entry {
     UnsignedValue ID;
     std::vector<FlowStringValue> Blocks;
+
     bool operator==(const Entry &Other) const {
       return ID == Other.ID && Blocks == Other.Blocks;
     }
@@ -329,6 +360,7 @@ struct MachineJumpTable {
 
   MachineJumpTableInfo::JTEntryKind Kind = MachineJumpTableInfo::EK_Custom32;
   std::vector<Entry> Entries;
+
   bool operator==(const MachineJumpTable &Other) const {
     return Kind == Other.Kind && Entries == Other.Entries;
   }
@@ -387,6 +419,7 @@ struct MachineFrameInfo {
   bool HasMustTailInVarArgFunc = false;
   StringValue SavePoint;
   StringValue RestorePoint;
+
   bool operator==(const MachineFrameInfo &Other) const {
     return IsFrameAddressTaken == Other.IsFrameAddressTaken &&
            IsReturnAddressTaken == Other.IsReturnAddressTaken &&
@@ -485,4 +518,4 @@ template <> struct MappingTraits<MachineFunction> {
 } // end namespace yaml
 } // end namespace llvm
 
-#endif
+#endif // LLVM_CODEGEN_MIRYAMLMAPPING_H
diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h
index 97a49ce4dc4fa..0c9110cbaa876 100644
--- a/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/include/llvm/CodeGen/MachineBasicBlock.h
@@ -25,6 +25,7 @@
 #include "llvm/MC/LaneBitmask.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Printable.h"
 #include <cassert>
 #include <cstdint>
 #include <functional>
@@ -97,6 +98,8 @@ private:
   using const_probability_iterator =
       std::vector<BranchProbability>::const_iterator;
 
+  Optional<uint64_t> IrrLoopHeaderWeight;
+
   /// Keep track of the physical registers that are livein of the basicblock.
   using LiveInVector = std::vector<RegisterMaskPair>;
   LiveInVector LiveIns;
@@ -699,8 +702,8 @@ public:
     LQR_Unknown ///< Register liveness not decidable from local neighborhood.
   };
 
-  /// Return whether (physical) register \p Reg has been <def>ined and not
-  /// <kill>ed as of just before \p Before.
+  /// Return whether (physical) register \p Reg has been defined and not
+  /// killed as of just before \p Before.
   ///
   /// Search is localised to a neighborhood of \p Neighborhood instructions
   /// before (searching for defs or kills) and \p Neighborhood instructions
@@ -729,6 +732,14 @@ public:
   /// Return the MCSymbol for this basic block.
   MCSymbol *getSymbol() const;
 
+  Optional<uint64_t> getIrrLoopHeaderWeight() const {
+    return IrrLoopHeaderWeight;
+  }
+
+  void setIrrLoopHeaderWeight(uint64_t Weight) {
+    IrrLoopHeaderWeight = Weight;
+  }
+
 private:
   /// Return probability iterator corresponding to the I successor iterator.
   probability_iterator getProbabilityIterator(succ_iterator I);
@@ -748,7 +759,7 @@ private:
 
   // Machine-CFG mutators
 
-  /// Remove Pred as a predecessor of this MachineBasicBlock. Don't do this
+  /// Add Pred as a predecessor of this MachineBasicBlock. Don't do this
   /// unless you know what you're doing, because it doesn't update Pred's
   /// successors list. Use Pred->addSuccessor instead.
   void addPredecessor(MachineBasicBlock *Pred);
@@ -761,9 +772,17 @@ private:
 
 raw_ostream& operator<<(raw_ostream &OS, const MachineBasicBlock &MBB);
 
+/// Prints a machine basic block reference.
+///
+/// The format is:
+///   %bb.5           - a machine basic block with MBB.getNumber() == 5.
+///
+/// Usage: OS << printMBBReference(MBB) << '\n';
+Printable printMBBReference(const MachineBasicBlock &MBB);
+
 // This is useful when building IndexedMaps keyed on basic block pointers.
-struct MBB2NumberFunctor :
-  public std::unary_function<const MachineBasicBlock*, unsigned> {
+struct MBB2NumberFunctor {
+  using argument_type = const MachineBasicBlock *;
   unsigned operator()(const MachineBasicBlock *MBB) const {
     return MBB->getNumber();
   }
diff --git a/include/llvm/CodeGen/MachineBlockFrequencyInfo.h b/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
index cba79c818a761..5b4b99ca0a5d8 100644
--- a/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
+++ b/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
@@ -62,6 +62,8 @@ public:
   Optional<uint64_t> getBlockProfileCount(const MachineBasicBlock *MBB) const;
   Optional<uint64_t> getProfileCountFromFreq(uint64_t Freq) const;
 
+  bool isIrrLoopHeader(const MachineBasicBlock *MBB);
+
   const MachineFunction *getFunction() const;
   const MachineBranchProbabilityInfo *getMBPI() const;
   void view(const Twine &Name, bool isSimple = true) const;
diff --git a/include/llvm/CodeGen/MachineCombinerPattern.h b/include/llvm/CodeGen/MachineCombinerPattern.h
index 8c54ae9254708..586535f771c28 100644
--- a/include/llvm/CodeGen/MachineCombinerPattern.h
+++ b/include/llvm/CodeGen/MachineCombinerPattern.h
@@ -68,12 +68,18 @@ enum class MachineCombinerPattern {
   FMLAv4i32_indexed_OP2,
   FMLSv1i32_indexed_OP2,
   FMLSv1i64_indexed_OP2,
-  FMLSv2i32_indexed_OP2,
-  FMLSv2i64_indexed_OP2,
+  FMLSv2f32_OP1,
   FMLSv2f32_OP2,
+  FMLSv2f64_OP1,
   FMLSv2f64_OP2,
-  FMLSv4i32_indexed_OP2,
-  FMLSv4f32_OP2
+  FMLSv2i32_indexed_OP1,
+  FMLSv2i32_indexed_OP2,
+  FMLSv2i64_indexed_OP1,
+  FMLSv2i64_indexed_OP2,
+  FMLSv4f32_OP1,
+  FMLSv4f32_OP2,
+  FMLSv4i32_indexed_OP1,
+  FMLSv4i32_indexed_OP2
 };
 
 } // end namespace llvm
diff --git a/include/llvm/CodeGen/MachineDominanceFrontier.h b/include/llvm/CodeGen/MachineDominanceFrontier.h
index 6efeefd9a7217..ffbcc62bfa36b 100644
--- a/include/llvm/CodeGen/MachineDominanceFrontier.h
+++ b/include/llvm/CodeGen/MachineDominanceFrontier.h
@@ -39,7 +39,7 @@ public:
 
  DominanceFrontierBase<MachineBasicBlock, false> &getBase() { return Base; }
 
- inline const std::vector<MachineBasicBlock *> &getRoots() const {
+  const SmallVectorImpl<MachineBasicBlock *> &getRoots() const {
    return Base.getRoots();
   }
 
diff --git a/include/llvm/CodeGen/MachineDominators.h b/include/llvm/CodeGen/MachineDominators.h
index 8bf98f6064956..98fdb51aae2fd 100644
--- a/include/llvm/CodeGen/MachineDominators.h
+++ b/include/llvm/CodeGen/MachineDominators.h
@@ -93,7 +93,7 @@ public:
   /// multiple blocks if we are computing post dominators.  For forward
   /// dominators, this will always be a single block (the entry node).
   ///
-  inline const std::vector<MachineBasicBlock*> &getRoots() const {
+  inline const SmallVectorImpl<MachineBasicBlock*> &getRoots() const {
     applySplitCriticalEdges();
     return DT->getRoots();
   }
diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h
index 689f3cd9fd12b..f887517217e18 100644
--- a/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/include/llvm/CodeGen/MachineFrameInfo.h
@@ -31,15 +31,30 @@ class AllocaInst;
 class CalleeSavedInfo {
   unsigned Reg;
   int FrameIdx;
+  /// Flag indicating whether the register is actually restored in the epilog.
+  /// In most cases, if a register is saved, it is also restored. There are
+  /// some situations, though, when this is not the case. For example, the
+  /// LR register on ARM is usually saved, but on exit from the function its
+  /// saved value may be loaded directly into PC. Since liveness tracking of
+  /// physical registers treats callee-saved registers are live outside of
+  /// the function, LR would be treated as live-on-exit, even though in these
+  /// scenarios it is not. This flag is added to indicate that the saved
+  /// register described by this object is not restored in the epilog.
+  /// The long-term solution is to model the liveness of callee-saved registers
+  /// by implicit uses on the return instructions, however, the required
+  /// changes in the ARM backend would be quite extensive.
+  bool Restored;
 
 public:
   explicit CalleeSavedInfo(unsigned R, int FI = 0)
-  : Reg(R), FrameIdx(FI) {}
+  : Reg(R), FrameIdx(FI), Restored(true) {}
 
   // Accessors.
   unsigned getReg()                        const { return Reg; }
   int getFrameIdx()                        const { return FrameIdx; }
   void setFrameIdx(int FI)                       { FrameIdx = FI; }
+  bool isRestored()                        const { return Restored; }
+  void setRestored(bool R)                       { Restored = R; }
 };
 
 /// The MachineFrameInfo class represents an abstract stack frame until
@@ -99,8 +114,16 @@ class MachineFrameInfo {
     /// and/or GC related) over a statepoint. We know that the address of the
     /// slot can't alias any LLVM IR value.  This is very similar to a Spill
     /// Slot, but is created by statepoint lowering is SelectionDAG, not the
-    /// register allocator. 
-    bool isStatepointSpillSlot;
+    /// register allocator.
+    bool isStatepointSpillSlot = false;
+
+    /// Identifier for stack memory type analagous to address space. If this is
+    /// non-0, the meaning is target defined. Offsets cannot be directly
+    /// compared between objects with different stack IDs. The object may not
+    /// necessarily reside in the same contiguous memory block as other stack
+    /// objects. Objects with differing stack IDs should not be merged or
+    /// replaced substituted for each other.
+    uint8_t StackID;
 
     /// If this stack object is originated from an Alloca instruction
     /// this value saves the original IR allocation. Can be NULL.
@@ -108,7 +131,7 @@ class MachineFrameInfo {
 
     // If true, the object was mapped into the local frame
     // block and doesn't need additional handling for allocation beyond that.
-    bool PreAllocated;
+    bool PreAllocated = false;
 
     // If true, an LLVM IR value might point to this object.
     // Normally, spill slots and fixed-offset objects don't alias IR-accessible
@@ -117,16 +140,17 @@ class MachineFrameInfo {
     bool isAliased;
 
     /// If true, the object has been zero-extended.
-    bool isZExt;
+    bool isZExt = false;
 
     /// If true, the object has been zero-extended.
-    bool isSExt;
-
-    StackObject(uint64_t Sz, unsigned Al, int64_t SP, bool IM,
-                bool isSS, const AllocaInst *Val, bool A)
-      : SPOffset(SP), Size(Sz), Alignment(Al), isImmutable(IM),
-        isSpillSlot(isSS), isStatepointSpillSlot(false), Alloca(Val),
-        PreAllocated(false), isAliased(A), isZExt(false), isSExt(false) {}
+    bool isSExt = false;
+
+    StackObject(uint64_t Size, unsigned Alignment, int64_t SPOffset,
+                bool IsImmutable, bool IsSpillSlot, const AllocaInst *Alloca,
+                bool IsAliased, uint8_t StackID = 0)
+      : SPOffset(SPOffset), Size(Size), Alignment(Alignment),
+        isImmutable(IsImmutable), isSpillSlot(IsSpillSlot),
+        StackID(StackID), Alloca(Alloca), isAliased(IsAliased) {}
   };
 
   /// The alignment of the stack.
@@ -549,13 +573,13 @@ public:
   /// All fixed objects should be created before other objects are created for
   /// efficiency. By default, fixed objects are not pointed to by LLVM IR
   /// values. This returns an index with a negative value.
-  int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool Immutable,
+  int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable,
                         bool isAliased = false);
 
   /// Create a spill slot at a fixed location on the stack.
   /// Returns an index with a negative value.
   int CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset,
-                                  bool Immutable = false);
+                                  bool IsImmutable = false);
 
   /// Returns true if the specified index corresponds to a fixed stack object.
   bool isFixedObjectIndex(int ObjectIdx) const {
@@ -581,10 +605,10 @@ public:
   }
 
   /// Marks the immutability of an object.
-  void setIsImmutableObjectIndex(int ObjectIdx, bool Immutable) {
+  void setIsImmutableObjectIndex(int ObjectIdx, bool IsImmutable) {
     assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
            "Invalid Object Idx!");
-    Objects[ObjectIdx+NumFixedObjects].isImmutable = Immutable;
+    Objects[ObjectIdx+NumFixedObjects].isImmutable = IsImmutable;
   }
 
   /// Returns true if the specified index corresponds to a spill slot.
@@ -600,6 +624,18 @@ public:
     return Objects[ObjectIdx+NumFixedObjects].isStatepointSpillSlot;
   }
 
+  /// \see StackID
+  uint8_t getStackID(int ObjectIdx) const {
+    return Objects[ObjectIdx+NumFixedObjects].StackID;
+  }
+
+  /// \see StackID
+  void setStackID(int ObjectIdx, uint8_t ID) {
+    assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
+           "Invalid Object Idx!");
+    Objects[ObjectIdx+NumFixedObjects].StackID = ID;
+  }
+
   /// Returns true if the specified index corresponds to a dead object.
   bool isDeadObjectIndex(int ObjectIdx) const {
     assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
@@ -624,8 +660,8 @@ public:
 
   /// Create a new statically sized stack object, returning
   /// a nonnegative identifier to represent it.
-  int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS,
-                        const AllocaInst *Alloca = nullptr);
+  int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSpillSlot,
+                        const AllocaInst *Alloca = nullptr, uint8_t ID = 0);
 
   /// Create a new statically sized stack object that represents a spill slot,
   /// returning a nonnegative identifier to represent it.
@@ -646,6 +682,8 @@ public:
   const std::vector<CalleeSavedInfo> &getCalleeSavedInfo() const {
     return CSInfo;
   }
+  /// \copydoc getCalleeSavedInfo()
+  std::vector<CalleeSavedInfo> &getCalleeSavedInfo() { return CSInfo; }
 
   /// Used by prolog/epilog inserter to set the function's callee saved
   /// information.
diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h
index 010d7032c516a..7d8b7ebe8d629 100644
--- a/include/llvm/CodeGen/MachineFunction.h
+++ b/include/llvm/CodeGen/MachineFunction.h
@@ -223,7 +223,7 @@ struct LandingPadInfo {
 };
 
 class MachineFunction {
-  const Function *Fn;
+  const Function &F;
   const TargetMachine &Target;
   const TargetSubtargetInfo *STI;
   MCContext &Ctx;
@@ -314,6 +314,9 @@ class MachineFunction {
   /// Map of invoke call site index values to associated begin EH_LABEL.
   DenseMap<MCSymbol*, unsigned> CallSiteMap;
 
+  /// CodeView label annotations.
+  std::vector<std::pair<MCSymbol *, MDNode *>> CodeViewAnnotations;
+
   bool CallsEHReturn = false;
   bool CallsUnwindInit = false;
   bool HasEHFunclets = false;
@@ -356,8 +359,9 @@ public:
   using VariableDbgInfoMapTy = SmallVector<VariableDbgInfo, 4>;
   VariableDbgInfoMapTy VariableDbgInfos;
 
-  MachineFunction(const Function *Fn, const TargetMachine &TM,
-                  unsigned FunctionNum, MachineModuleInfo &MMI);
+  MachineFunction(const Function &F, const TargetMachine &TM,
+                  const TargetSubtargetInfo &STI, unsigned FunctionNum,
+                  MachineModuleInfo &MMI);
   MachineFunction(const MachineFunction &) = delete;
   MachineFunction &operator=(const MachineFunction &) = delete;
   ~MachineFunction();
@@ -376,8 +380,8 @@ public:
   /// Return the DataLayout attached to the Module associated to this MF.
   const DataLayout &getDataLayout() const;
 
-  /// getFunction - Return the LLVM function that this machine code represents
-  const Function *getFunction() const { return Fn; }
+  /// Return the LLVM function that this machine code represents
+  const Function &getFunction() const { return F; }
 
   /// getName - Return the name of the corresponding LLVM function.
   StringRef getName() const;
@@ -625,14 +629,23 @@ public:
   MachineInstr *CreateMachineInstr(const MCInstrDesc &MCID, const DebugLoc &DL,
                                    bool NoImp = false);
 
-  /// CloneMachineInstr - Create a new MachineInstr which is a copy of the
-  /// 'Orig' instruction, identical in all ways except the instruction
-  /// has no parent, prev, or next.
+  /// Create a new MachineInstr which is a copy of \p Orig, identical in all
+  /// ways except the instruction has no parent, prev, or next. Bundling flags
+  /// are reset.
   ///
-  /// See also TargetInstrInfo::duplicate() for target-specific fixes to cloned
-  /// instructions.
+  /// Note: Clones a single instruction, not whole instruction bundles.
+  /// Does not perform target specific adjustments; consider using
+  /// TargetInstrInfo::duplicate() instead.
   MachineInstr *CloneMachineInstr(const MachineInstr *Orig);
 
+  /// Clones instruction or the whole instruction bundle \p Orig and insert
+  /// into \p MBB before \p InsertBefore.
+  ///
+  /// Note: Does not perform target specific adjustments; consider using
+  /// TargetInstrInfo::duplicate() intead.
+  MachineInstr &CloneMachineInstrBundle(MachineBasicBlock &MBB,
+      MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig);
+
   /// DeleteMachineInstr - Delete the given MachineInstr.
   void DeleteMachineInstr(MachineInstr *MI);
 
@@ -823,6 +836,15 @@ public:
     return CallSiteMap.count(BeginLabel);
   }
 
+  /// Record annotations associated with a particular label.
+  void addCodeViewAnnotation(MCSymbol *Label, MDNode *MD) {
+    CodeViewAnnotations.push_back({Label, MD});
+  }
+
+  ArrayRef<std::pair<MCSymbol *, MDNode *>> getCodeViewAnnotations() const {
+    return CodeViewAnnotations;
+  }
+
   /// Return a reference to the C++ typeinfo for the current function.
   const std::vector<const GlobalValue *> &getTypeInfos() const {
     return TypeInfos;
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index b87aff102d478..3c1c1bb14f426 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -22,11 +22,11 @@
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/Support/ArrayRecycler.h"
-#include "llvm/Target/TargetOpcodes.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -44,6 +44,7 @@ class MachineRegisterInfo;
 class ModuleSlotTracker;
 class raw_ostream;
 template <typename T> class SmallVectorImpl;
+class SmallBitVector;
 class StringRef;
 class TargetInstrInfo;
 class TargetRegisterClass;
@@ -67,7 +68,9 @@ public:
   /// otherwise easily derivable from the IR text.
   ///
   enum CommentFlag {
-    ReloadReuse = 0x1 // higher bits are reserved for target dep comments.
+    ReloadReuse = 0x1,    // higher bits are reserved for target dep comments.
+    NoSchedComment = 0x2,
+    TAsmComments = 0x4    // Target Asm comments should start from this value.
   };
 
   enum MIFlag {
@@ -139,6 +142,17 @@ public:
   const MachineBasicBlock* getParent() const { return Parent; }
   MachineBasicBlock* getParent() { return Parent; }
 
+  /// Return the function that contains the basic block that this instruction
+  /// belongs to.
+  ///
+  /// Note: this is undefined behaviour if the instruction does not have a
+  /// parent.
+  const MachineFunction *getMF() const;
+  MachineFunction *getMF() {
+    return const_cast<MachineFunction *>(
+        static_cast<const MachineInstr *>(this)->getMF());
+  }
+
   /// Return the asm printer flags bitvector.
   uint8_t getAsmPrinterFlags() const { return AsmPrinterFlags; }
 
@@ -290,6 +304,21 @@ public:
     return Operands[i];
   }
 
+  /// Return true if operand \p OpIdx is a subregister index.
+  bool isOperandSubregIdx(unsigned OpIdx) const {
+    assert(getOperand(OpIdx).getType() == MachineOperand::MO_Immediate &&
+           "Expected MO_Immediate operand type.");
+    if (isExtractSubreg() && OpIdx == 2)
+      return true;
+    if (isInsertSubreg() && OpIdx == 3)
+      return true;
+    if (isRegSequence() && OpIdx > 1 && (OpIdx % 2) == 0)
+      return true;
+    if (isSubregToReg() && OpIdx == 3)
+      return true;
+    return false;
+  }
+
   /// Returns the number of non-implicit operands.
   unsigned getNumExplicitOperands() const;
 
@@ -771,9 +800,14 @@ public:
 
   bool isEHLabel() const { return getOpcode() == TargetOpcode::EH_LABEL; }
   bool isGCLabel() const { return getOpcode() == TargetOpcode::GC_LABEL; }
+  bool isAnnotationLabel() const {
+    return getOpcode() == TargetOpcode::ANNOTATION_LABEL;
+  }
 
   /// Returns true if the MachineInstr represents a label.
-  bool isLabel() const { return isEHLabel() || isGCLabel(); }
+  bool isLabel() const {
+    return isEHLabel() || isGCLabel() || isAnnotationLabel();
+  }
 
   bool isCFIInstruction() const {
     return getOpcode() == TargetOpcode::CFI_INSTRUCTION;
@@ -792,7 +826,10 @@ public:
       && getOperand(1).isImm();
   }
 
-  bool isPHI() const { return getOpcode() == TargetOpcode::PHI; }
+  bool isPHI() const {
+    return getOpcode() == TargetOpcode::PHI ||
+           getOpcode() == TargetOpcode::G_PHI;
+  }
   bool isKill() const { return getOpcode() == TargetOpcode::KILL; }
   bool isImplicitDef() const { return getOpcode()==TargetOpcode::IMPLICIT_DEF; }
   bool isInlineAsm() const { return getOpcode() == TargetOpcode::INLINEASM; }
@@ -869,6 +906,7 @@ public:
       return isMetaInstruction();
     // Copy-like instructions are usually eliminated during register allocation.
     case TargetOpcode::PHI:
+    case TargetOpcode::G_PHI:
     case TargetOpcode::COPY:
     case TargetOpcode::INSERT_SUBREG:
     case TargetOpcode::SUBREG_TO_REG:
@@ -1185,6 +1223,15 @@ public:
 
   /// Debugging support
   /// @{
+  /// Determine the generic type to be printed (if needed) on uses and defs.
+  LLT getTypeToPrint(unsigned OpIdx, SmallBitVector &PrintedTypes,
+                     const MachineRegisterInfo &MRI) const;
+
+  /// Return true when an instruction has tied register that can't be determined
+  /// by the instruction's descriptor. This is useful for MIR printing, to
+  /// determine whether we need to print the ties or not.
+  bool hasComplexRegisterTies() const;
+
   /// Print this MI to \p OS.
   /// Only print the defs and the opcode if \p SkipOpers is true.
   /// Otherwise, also print operands if \p SkipDebugLoc is true.
diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h
index 412c55d542ea6..e4f3976ec9504 100644
--- a/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -25,6 +25,7 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -48,6 +49,7 @@ namespace RegState {
     EarlyClobber   = 0x40,
     Debug          = 0x80,
     InternalRead   = 0x100,
+    Renamable      = 0x200,
     DefineNoRead   = Define | Undef,
     ImplicitDefine = Implicit | Define,
     ImplicitKill   = Implicit | Kill
@@ -91,7 +93,8 @@ public:
                                                flags & RegState::EarlyClobber,
                                                SubReg,
                                                flags & RegState::Debug,
-                                               flags & RegState::InternalRead));
+                                               flags & RegState::InternalRead,
+                                               flags & RegState::Renamable));
     return *this;
   }
 
@@ -396,28 +399,32 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB, const DebugLoc &DL,
 }
 
 /// This version of the builder builds a DBG_VALUE intrinsic
-/// for either a value in a register or a register-indirect+offset
+/// for either a value in a register or a register-indirect
 /// address.  The convention is that a DBG_VALUE is indirect iff the
 /// second operand is an immediate.
 MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL,
                             const MCInstrDesc &MCID, bool IsIndirect,
-                            unsigned Reg, unsigned Offset,
-                            const MDNode *Variable, const MDNode *Expr);
+                            unsigned Reg, const MDNode *Variable,
+                            const MDNode *Expr);
 
 /// This version of the builder builds a DBG_VALUE intrinsic
-/// for either a value in a register or a register-indirect+offset
+/// for either a value in a register or a register-indirect
 /// address and inserts it at position I.
 MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
                             MachineBasicBlock::iterator I, const DebugLoc &DL,
                             const MCInstrDesc &MCID, bool IsIndirect,
-                            unsigned Reg, unsigned Offset,
-                            const MDNode *Variable, const MDNode *Expr);
+                            unsigned Reg, const MDNode *Variable,
+                            const MDNode *Expr);
 
 /// Clone a DBG_VALUE whose value has been spilled to FrameIndex.
 MachineInstr *buildDbgValueForSpill(MachineBasicBlock &BB,
                                     MachineBasicBlock::iterator I,
                                     const MachineInstr &Orig, int FrameIndex);
 
+/// Update a DBG_VALUE whose value has been spilled to FrameIndex. Useful when
+/// modifying an instruction in place while iterating over a basic block.
+void updateDbgValueForSpill(MachineInstr &Orig, int FrameIndex);
+
 inline unsigned getDefRegState(bool B) {
   return B ? RegState::Define : 0;
 }
@@ -439,6 +446,9 @@ inline unsigned getInternalReadRegState(bool B) {
 inline unsigned getDebugRegState(bool B) {
   return B ? RegState::Debug : 0;
 }
+inline unsigned getRenamableRegState(bool B) {
+  return B ? RegState::Renamable : 0;
+}
 
 /// Get all register state flags from machine operand \p RegOp.
 inline unsigned getRegState(const MachineOperand &RegOp) {
@@ -449,7 +459,10 @@ inline unsigned getRegState(const MachineOperand &RegOp) {
          getDeadRegState(RegOp.isDead())                  |
          getUndefRegState(RegOp.isUndef())                |
          getInternalReadRegState(RegOp.isInternalRead())  |
-         getDebugRegState(RegOp.isDebug());
+         getDebugRegState(RegOp.isDebug())                |
+         getRenamableRegState(
+             TargetRegisterInfo::isPhysicalRegister(RegOp.getReg()) &&
+             RegOp.isRenamable());
 }
 
 /// Helper class for constructing bundles of MachineInstrs.
diff --git a/include/llvm/CodeGen/MachineInstrBundle.h b/include/llvm/CodeGen/MachineInstrBundle.h
index 995c7001d9282..b5341fd1ae496 100644
--- a/include/llvm/CodeGen/MachineInstrBundle.h
+++ b/include/llvm/CodeGen/MachineInstrBundle.h
@@ -150,7 +150,7 @@ public:
   ///
   struct VirtRegInfo {
     /// Reads - One of the operands read the virtual register.  This does not
-    /// include <undef> or <internal> use operands, see MO::readsReg().
+    /// include undef or internal use operands, see MO::readsReg().
     bool Reads;
 
     /// Writes - One of the operands writes the virtual register.
diff --git a/include/llvm/CodeGen/MachineJumpTableInfo.h b/include/llvm/CodeGen/MachineJumpTableInfo.h
index adcd1d0de63d3..25a3e6b556a3a 100644
--- a/include/llvm/CodeGen/MachineJumpTableInfo.h
+++ b/include/llvm/CodeGen/MachineJumpTableInfo.h
@@ -20,6 +20,7 @@
 #ifndef LLVM_CODEGEN_MACHINEJUMPTABLEINFO_H
 #define LLVM_CODEGEN_MACHINEJUMPTABLEINFO_H
 
+#include "llvm/Support/Printable.h"
 #include <cassert>
 #include <vector>
 
@@ -125,6 +126,15 @@ public:
   void dump() const;
 };
 
+
+/// Prints a jump table entry reference.
+///
+/// The format is:
+///   %jump-table.5       - a jump table entry with index == 5.
+///
+/// Usage: OS << printJumpTableEntryReference(Idx) << '\n';
+Printable printJumpTableEntryReference(unsigned Idx);
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/MachineLoopInfo.h b/include/llvm/CodeGen/MachineLoopInfo.h
index 58cffaade9d2a..104655e455246 100644
--- a/include/llvm/CodeGen/MachineLoopInfo.h
+++ b/include/llvm/CodeGen/MachineLoopInfo.h
@@ -44,8 +44,6 @@ extern template class LoopBase<MachineBasicBlock, MachineLoop>;
 
 class MachineLoop : public LoopBase<MachineBasicBlock, MachineLoop> {
 public:
-  MachineLoop();
-
   /// Return the "top" block in the loop, which is the first block in the linear
   /// layout, ignoring any parts of the loop not contiguous with the part that
   /// contains the header.
@@ -76,6 +74,8 @@ private:
 
   explicit MachineLoop(MachineBasicBlock *MBB)
     : LoopBase<MachineBasicBlock, MachineLoop>(MBB) {}
+
+  MachineLoop() = default;
 };
 
 // Implementation in LoopInfoImpl.h
diff --git a/include/llvm/CodeGen/MachineMemOperand.h b/include/llvm/CodeGen/MachineMemOperand.h
index a9de0db05d72c..c5b204a79f040 100644
--- a/include/llvm/CodeGen/MachineMemOperand.h
+++ b/include/llvm/CodeGen/MachineMemOperand.h
@@ -45,18 +45,46 @@ struct MachinePointerInfo {
   /// Offset - This is an offset from the base Value*.
   int64_t Offset;
 
-  explicit MachinePointerInfo(const Value *v = nullptr, int64_t offset = 0)
-    : V(v), Offset(offset) {}
+  uint8_t StackID;
 
-  explicit MachinePointerInfo(const PseudoSourceValue *v,
-                              int64_t offset = 0)
-    : V(v), Offset(offset) {}
+  unsigned AddrSpace = 0;
+
+  explicit MachinePointerInfo(const Value *v, int64_t offset = 0,
+                              uint8_t ID = 0)
+      : V(v), Offset(offset), StackID(ID) {
+    AddrSpace = v ? v->getType()->getPointerAddressSpace() : 0;
+  }
+
+  explicit MachinePointerInfo(const PseudoSourceValue *v, int64_t offset = 0,
+                              uint8_t ID = 0)
+      : V(v), Offset(offset), StackID(ID) {
+    AddrSpace = v ? v->getAddressSpace() : 0;
+  }
+
+  explicit MachinePointerInfo(unsigned AddressSpace = 0)
+      : V((const Value *)nullptr), Offset(0), StackID(0),
+        AddrSpace(AddressSpace) {}
+
+  explicit MachinePointerInfo(
+    PointerUnion<const Value *, const PseudoSourceValue *> v,
+    int64_t offset = 0,
+    uint8_t ID = 0)
+    : V(v), Offset(offset), StackID(ID) {
+    if (V) {
+      if (const auto *ValPtr = V.dyn_cast<const Value*>())
+        AddrSpace = ValPtr->getType()->getPointerAddressSpace();
+      else
+        AddrSpace = V.get<const PseudoSourceValue*>()->getAddressSpace();
+    }
+  }
 
   MachinePointerInfo getWithOffset(int64_t O) const {
-    if (V.isNull()) return MachinePointerInfo();
+    if (V.isNull())
+      return MachinePointerInfo(AddrSpace);
     if (V.is<const Value*>())
-      return MachinePointerInfo(V.get<const Value*>(), Offset+O);
-    return MachinePointerInfo(V.get<const PseudoSourceValue*>(), Offset+O);
+      return MachinePointerInfo(V.get<const Value*>(), Offset+O, StackID);
+    return MachinePointerInfo(V.get<const PseudoSourceValue*>(), Offset+O,
+                              StackID);
   }
 
   /// Return true if memory region [V, V+Offset+Size) is known to be
@@ -82,7 +110,11 @@ struct MachinePointerInfo {
   static MachinePointerInfo getGOT(MachineFunction &MF);
 
   /// Stack pointer relative access.
-  static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset);
+  static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset,
+                                     uint8_t ID = 0);
+
+  /// Stack memory without other information.
+  static MachinePointerInfo getUnknownStack(MachineFunction &MF);
 };
 
 
diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h
index d64941a9e725a..6be304fa368bb 100644
--- a/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/include/llvm/CodeGen/MachineModuleInfo.h
@@ -125,6 +125,16 @@ class MachineModuleInfo : public ImmutablePass {
   /// comments in lib/Target/X86/X86FrameLowering.cpp for more details.
   bool UsesMorestackAddr;
 
+  /// True if the module contains split-stack functions. This is used to
+  /// emit .note.GNU-split-stack section as required by the linker for
+  /// special handling split-stack function calling no-split-stack function.
+  bool HasSplitStack;
+
+  /// True if the module contains no-split-stack functions. This is used to
+  /// emit .note.GNU-no-split-stack section when it also contains split-stack
+  /// functions.
+  bool HasNosplitStack;
+
   /// Maps IR Functions to their corresponding MachineFunctions.
   DenseMap<const Function*, std::unique_ptr<MachineFunction>> MachineFunctions;
   /// Next unique number available for a MachineFunction.
@@ -145,7 +155,6 @@ public:
   const MCContext &getContext() const { return Context; }
   MCContext &getContext() { return Context; }
 
-  void setModule(const Module *M) { TheModule = M; }
   const Module *getModule() const { return TheModule; }
 
   /// Returns the MachineFunction constructed for the IR function \p F.
@@ -194,6 +203,22 @@ public:
     UsesMorestackAddr = b;
   }
 
+  bool hasSplitStack() const {
+    return HasSplitStack;
+  }
+
+  void setHasSplitStack(bool b) {
+    HasSplitStack = b;
+  }
+
+  bool hasNosplitStack() const {
+    return HasNosplitStack;
+  }
+
+  void setHasNosplitStack(bool b) {
+    HasNosplitStack = b;
+  }
+
   /// Return the symbol to be used for the specified basic block when its
   /// address is taken.  This cannot be its normal LBB label because the block
   /// may be accessed outside its containing function.
diff --git a/include/llvm/CodeGen/MachineModuleInfoImpls.h b/include/llvm/CodeGen/MachineModuleInfoImpls.h
index 34b21ceddd434..6a87fa2fbf009 100644
--- a/include/llvm/CodeGen/MachineModuleInfoImpls.h
+++ b/include/llvm/CodeGen/MachineModuleInfoImpls.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/MachineModuleInfoImpls.h -------------------*- C++ -*-===//
+//===- llvm/CodeGen/MachineModuleInfoImpls.h --------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,11 +15,12 @@
 #ifndef LLVM_CODEGEN_MACHINEMODULEINFOIMPLS_H
 #define LLVM_CODEGEN_MACHINEMODULEINFOIMPLS_H
 
-#include "llvm/BinaryFormat/Wasm.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/ValueTypes.h"
+#include <cassert>
 
 namespace llvm {
+
 class MCSymbol;
 
 /// MachineModuleInfoMachO - This is a MachineModuleInfoImpl implementation
@@ -36,6 +37,7 @@ class MachineModuleInfoMachO : public MachineModuleInfoImpl {
   DenseMap<MCSymbol *, StubValueTy> ThreadLocalGVStubs;
 
   virtual void anchor(); // Out of line virtual method.
+
 public:
   MachineModuleInfoMachO(const MachineModuleInfo &) {}
 
@@ -64,6 +66,7 @@ class MachineModuleInfoELF : public MachineModuleInfoImpl {
   DenseMap<MCSymbol *, StubValueTy> GVStubs;
 
   virtual void anchor(); // Out of line virtual method.
+
 public:
   MachineModuleInfoELF(const MachineModuleInfo &) {}
 
@@ -79,4 +82,4 @@ public:
 
 } // end namespace llvm
 
-#endif
+#endif // LLVM_CODEGEN_MACHINEMODULEINFOIMPLS_H
diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h
index 2560399bcf545..ccf0917ed0851 100644
--- a/include/llvm/CodeGen/MachineOperand.h
+++ b/include/llvm/CodeGen/MachineOperand.h
@@ -14,8 +14,10 @@
 #ifndef LLVM_CODEGEN_MACHINEOPERAND_H
 #define LLVM_CODEGEN_MACHINEOPERAND_H
 
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
 #include <cassert>
 
 namespace llvm {
@@ -65,6 +67,7 @@ public:
     MO_CFIIndex,          ///< MCCFIInstruction index.
     MO_IntrinsicID,       ///< Intrinsic ID for ISel
     MO_Predicate,         ///< Generic predicate for ISel
+    MO_Last = MO_Predicate,
   };
 
 private:
@@ -83,24 +86,30 @@ private:
   /// before MachineInstr::tieOperands().
   unsigned char TiedTo : 4;
 
-  /// IsDef/IsImp/IsKill/IsDead flags - These are only valid for MO_Register
-  /// operands.
-
   /// IsDef - True if this is a def, false if this is a use of the register.
+  /// This is only valid on register operands.
   ///
   bool IsDef : 1;
 
   /// IsImp - True if this is an implicit def or use, false if it is explicit.
+  /// This is only valid on register opderands.
   ///
   bool IsImp : 1;
 
-  /// IsKill - True if this instruction is the last use of the register on this
-  /// path through the function.  This is only valid on uses of registers.
-  bool IsKill : 1;
-
-  /// IsDead - True if this register is never used by a subsequent instruction.
-  /// This is only valid on definitions of registers.
-  bool IsDead : 1;
+  /// IsDeadOrKill
+  /// For uses: IsKill - True if this instruction is the last use of the
+  /// register on this path through the function.
+  /// For defs: IsDead - True if this register is never used by a subsequent
+  /// instruction.
+  /// This is only valid on register operands.
+  bool IsDeadOrKill : 1;
+
+  /// IsRenamable - True if this register may be renamed, i.e. it does not
+  /// generate a value that is somehow read in a way that is not represented by
+  /// the Machine IR (e.g. to meet an ABI or ISA requirement).  This is only
+  /// valid on physical register operands.  Virtual registers are assumed to
+  /// always be renamable regardless of the value of this field.
+  bool IsRenamable : 1;
 
   /// IsUndef - True if this register operand reads an "undef" value, i.e. the
   /// read value doesn't matter.  This flag can be set on both use and def
@@ -114,9 +123,9 @@ private:
   /// the same register.  In that case, the instruction may depend on those
   /// operands reading the same dont-care value.  For example:
   ///
-  ///   %vreg1<def> = XOR %vreg2<undef>, %vreg2<undef>
+  ///   %1 = XOR undef %2, undef %2
   ///
-  /// Any register can be used for %vreg2, and its value doesn't matter, but
+  /// Any register can be used for %2, and its value doesn't matter, but
   /// the two operands must be the same register.
   ///
   bool IsUndef : 1;
@@ -224,11 +233,50 @@ public:
   ///
   void clearParent() { ParentMI = nullptr; }
 
+  /// Print a subreg index operand.
+  /// MO_Immediate operands can also be subreg idices. If it's the case, the
+  /// subreg index name will be printed. MachineInstr::isOperandSubregIdx can be
+  /// called to check this.
+  static void printSubregIdx(raw_ostream &OS, uint64_t Index,
+                             const TargetRegisterInfo *TRI);
+
+  /// Print operand target flags.
+  static void printTargetFlags(raw_ostream& OS, const MachineOperand &Op);
+
+  /// Print a MCSymbol as an operand.
+  static void printSymbol(raw_ostream &OS, MCSymbol &Sym);
+
+  /// Print a stack object reference.
+  static void printStackObjectReference(raw_ostream &OS, unsigned FrameIndex,
+                                        bool IsFixed, StringRef Name);
+
+  /// Print the MachineOperand to \p os.
+  /// Providing a valid \p TRI and \p IntrinsicInfo results in a more
+  /// target-specific printing. If \p TRI and \p IntrinsicInfo are null, the
+  /// function will try to pick it up from the parent.
   void print(raw_ostream &os, const TargetRegisterInfo *TRI = nullptr,
              const TargetIntrinsicInfo *IntrinsicInfo = nullptr) const;
-  void print(raw_ostream &os, ModuleSlotTracker &MST,
-             const TargetRegisterInfo *TRI = nullptr,
-             const TargetIntrinsicInfo *IntrinsicInfo = nullptr) const;
+
+  /// More complex way of printing a MachineOperand.
+  /// \param TypeToPrint specifies the generic type to be printed on uses and
+  /// defs. It can be determined using MachineInstr::getTypeToPrint.
+  /// \param PrintDef - whether we want to print `def` on an operand which
+  /// isDef. Sometimes, if the operand is printed before '=', we don't print
+  /// `def`.
+  /// \param ShouldPrintRegisterTies - whether we want to print register ties.
+  /// Sometimes they are easily determined by the instruction's descriptor
+  /// (MachineInstr::hasComplexRegiterTies can determine if it's needed).
+  /// \param TiedOperandIdx - if we need to print register ties this needs to
+  /// provide the index of the tied register. If not, it will be ignored.
+  /// \param TRI - provide more target-specific information to the printer.
+  /// Unlike the previous function, this one will not try and get the
+  /// information from it's parent.
+  /// \param IntrinsicInfo - same as \p TRI.
+  void print(raw_ostream &os, ModuleSlotTracker &MST, LLT TypeToPrint,
+             bool PrintDef, bool ShouldPrintRegisterTies,
+             unsigned TiedOperandIdx, const TargetRegisterInfo *TRI,
+             const TargetIntrinsicInfo *IntrinsicInfo) const;
+
   void dump() const;
 
   //===--------------------------------------------------------------------===//
@@ -301,12 +349,12 @@ public:
 
   bool isDead() const {
     assert(isReg() && "Wrong MachineOperand accessor");
-    return IsDead;
+    return IsDeadOrKill & IsDef;
   }
 
   bool isKill() const {
     assert(isReg() && "Wrong MachineOperand accessor");
-    return IsKill;
+    return IsDeadOrKill & !IsDef;
   }
 
   bool isUndef() const {
@@ -314,6 +362,8 @@ public:
     return IsUndef;
   }
 
+  bool isRenamable() const;
+
   bool isInternalRead() const {
     assert(isReg() && "Wrong MachineOperand accessor");
     return IsInternalRead;
@@ -369,12 +419,13 @@ public:
 
   /// substPhysReg - Substitute the current register with the physical register
   /// Reg, taking any existing SubReg into account. For instance,
-  /// substPhysReg(%EAX) will change %reg1024:sub_8bit to %AL.
+  /// substPhysReg(%eax) will change %reg1024:sub_8bit to %al.
   ///
   void substPhysReg(unsigned Reg, const TargetRegisterInfo&);
 
   void setIsUse(bool Val = true) { setIsDef(!Val); }
 
+  /// Change a def to a use, or a use to a def.
   void setIsDef(bool Val = true);
 
   void setImplicit(bool Val = true) {
@@ -385,12 +436,12 @@ public:
   void setIsKill(bool Val = true) {
     assert(isReg() && !IsDef && "Wrong MachineOperand mutator");
     assert((!Val || !isDebug()) && "Marking a debug operation as kill");
-    IsKill = Val;
+    IsDeadOrKill = Val;
   }
 
   void setIsDead(bool Val = true) {
     assert(isReg() && IsDef && "Wrong MachineOperand mutator");
-    IsDead = Val;
+    IsDeadOrKill = Val;
   }
 
   void setIsUndef(bool Val = true) {
@@ -398,6 +449,12 @@ public:
     IsUndef = Val;
   }
 
+  void setIsRenamable(bool Val = true);
+
+  /// Set IsRenamable to true if there are no extra register allocation
+  /// requirements placed on this operand by the parent instruction's opcode.
+  void setIsRenamableIfNoExtraRegAllocReq();
+
   void setIsInternalRead(bool Val = true) {
     assert(isReg() && "Wrong MachineOperand mutator");
     IsInternalRead = Val;
@@ -549,6 +606,11 @@ public:
     Contents.OffsetedInfo.Val.Index = Idx;
   }
 
+  void setMetadata(const MDNode *MD) {
+    assert(isMetadata() && "Wrong MachineOperand mutator");
+    Contents.MD = MD;
+  }
+
   void setMBB(MachineBasicBlock *MBB) {
     assert(isMBB() && "Wrong MachineOperand mutator");
     Contents.MBB = MBB;
@@ -568,14 +630,16 @@ public:
   //===--------------------------------------------------------------------===//
 
   /// Returns true if this operand is identical to the specified operand except
-  /// for liveness related flags (isKill, isUndef and isDead).
+  /// for liveness related flags (isKill, isUndef and isDead). Note that this
+  /// should stay in sync with the hash_value overload below.
   bool isIdenticalTo(const MachineOperand &Other) const;
 
   /// \brief MachineOperand hash_value overload.
   ///
   /// Note that this includes the same information in the hash that
   /// isIdenticalTo uses for comparison. It is thus suited for use in hash
-  /// tables which use that function for equality comparisons only.
+  /// tables which use that function for equality comparisons only. This must
+  /// stay exactly in sync with isIdenticalTo above.
   friend hash_code hash_value(const MachineOperand &MO);
 
   /// ChangeToImmediate - Replace this operand with a new immediate operand of
@@ -597,6 +661,10 @@ public:
   /// Replace this operand with a frame index.
   void ChangeToFrameIndex(int Idx);
 
+  /// Replace this operand with a target index.
+  void ChangeToTargetIndex(unsigned Idx, int64_t Offset,
+                           unsigned char TargetFlags = 0);
+
   /// ChangeToRegister - Replace this operand with a new register operand of
   /// the specified value.  If an operand is known to be an register already,
   /// the setReg method should be used.
@@ -630,16 +698,16 @@ public:
                                   bool isKill = false, bool isDead = false,
                                   bool isUndef = false,
                                   bool isEarlyClobber = false,
-                                  unsigned SubReg = 0,
-                                  bool isDebug = false,
-                                  bool isInternalRead = false) {
+                                  unsigned SubReg = 0, bool isDebug = false,
+                                  bool isInternalRead = false,
+                                  bool isRenamable = false) {
     assert(!(isDead && !isDef) && "Dead flag on non-def");
     assert(!(isKill && isDef) && "Kill flag on def");
     MachineOperand Op(MachineOperand::MO_Register);
     Op.IsDef = isDef;
     Op.IsImp = isImp;
-    Op.IsKill = isKill;
-    Op.IsDead = isDead;
+    Op.IsDeadOrKill = isKill | isDead;
+    Op.IsRenamable = isRenamable;
     Op.IsUndef = isUndef;
     Op.IsInternalRead = isInternalRead;
     Op.IsEarlyClobber = isEarlyClobber;
@@ -679,8 +747,7 @@ public:
     Op.setTargetFlags(TargetFlags);
     return Op;
   }
-  static MachineOperand CreateJTI(unsigned Idx,
-                                  unsigned char TargetFlags = 0) {
+  static MachineOperand CreateJTI(unsigned Idx, unsigned char TargetFlags = 0) {
     MachineOperand Op(MachineOperand::MO_JumpTableIndex);
     Op.setIndex(Idx);
     Op.setTargetFlags(TargetFlags);
@@ -711,12 +778,12 @@ public:
     return Op;
   }
   /// CreateRegMask - Creates a register mask operand referencing Mask.  The
-  /// operand does not take ownership of the memory referenced by Mask, it must
-  /// remain valid for the lifetime of the operand.
+  /// operand does not take ownership of the memory referenced by Mask, it
+  /// must remain valid for the lifetime of the operand.
   ///
-  /// A RegMask operand represents a set of non-clobbered physical registers on
-  /// an instruction that clobbers many registers, typically a call.  The bit
-  /// mask has a bit set for each physreg that is preserved by this
+  /// A RegMask operand represents a set of non-clobbered physical registers
+  /// on an instruction that clobbers many registers, typically a call.  The
+  /// bit mask has a bit set for each physreg that is preserved by this
   /// instruction, as described in the documentation for
   /// TargetRegisterInfo::getCallPreservedMask().
   ///
@@ -769,30 +836,63 @@ public:
 
   friend class MachineInstr;
   friend class MachineRegisterInfo;
+
 private:
+  // If this operand is currently a register operand, and if this is in a
+  // function, deregister the operand from the register's use/def list.
   void removeRegFromUses();
 
+  /// Artificial kinds for DenseMap usage.
+  enum : unsigned char {
+    MO_Empty = MO_Last + 1,
+    MO_Tombstone,
+  };
+
+  friend struct DenseMapInfo<MachineOperand>;
+
   //===--------------------------------------------------------------------===//
   // Methods for handling register use/def lists.
   //===--------------------------------------------------------------------===//
 
-  /// isOnRegUseList - Return true if this operand is on a register use/def list
-  /// or false if not.  This can only be called for register operands that are
-  /// part of a machine instruction.
+  /// isOnRegUseList - Return true if this operand is on a register use/def
+  /// list or false if not.  This can only be called for register operands
+  /// that are part of a machine instruction.
   bool isOnRegUseList() const {
     assert(isReg() && "Can only add reg operand to use lists");
     return Contents.Reg.Prev != nullptr;
   }
 };
 
-inline raw_ostream &operator<<(raw_ostream &OS, const MachineOperand& MO) {
-  MO.print(OS, nullptr);
+template <> struct DenseMapInfo<MachineOperand> {
+  static MachineOperand getEmptyKey() {
+    return MachineOperand(static_cast<MachineOperand::MachineOperandType>(
+        MachineOperand::MO_Empty));
+  }
+  static MachineOperand getTombstoneKey() {
+    return MachineOperand(static_cast<MachineOperand::MachineOperandType>(
+        MachineOperand::MO_Tombstone));
+  }
+  static unsigned getHashValue(const MachineOperand &MO) {
+    return hash_value(MO);
+  }
+  static bool isEqual(const MachineOperand &LHS, const MachineOperand &RHS) {
+    if (LHS.getType() == static_cast<MachineOperand::MachineOperandType>(
+                             MachineOperand::MO_Empty) ||
+        LHS.getType() == static_cast<MachineOperand::MachineOperandType>(
+                             MachineOperand::MO_Tombstone))
+      return LHS.getType() == RHS.getType();
+    return LHS.isIdenticalTo(RHS);
+  }
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const MachineOperand &MO) {
+  MO.print(OS);
   return OS;
 }
 
-  // See friend declaration above. This additional declaration is required in
-  // order to compile LLVM with IBM xlC compiler.
-  hash_code hash_value(const MachineOperand &MO);
-} // End llvm namespace
+// See friend declaration above. This additional declaration is required in
+// order to compile LLVM with IBM xlC compiler.
+hash_code hash_value(const MachineOperand &MO);
+} // namespace llvm
 
 #endif
diff --git a/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h b/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
index 6ad5de533d13d..2fdefbed37ce0 100644
--- a/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
+++ b/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
@@ -16,7 +16,7 @@
 #ifndef LLVM_CODEGEN_MACHINEOPTIMIZATIONREMARKEMITTER_H
 #define LLVM_CODEGEN_MACHINEOPTIMIZATIONREMARKEMITTER_H
 
-#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 
 namespace llvm {
@@ -33,7 +33,7 @@ public:
                                 const DiagnosticLocation &Loc,
                                 const MachineBasicBlock *MBB)
       : DiagnosticInfoOptimizationBase(Kind, DS_Remark, PassName, RemarkName,
-                                       *MBB->getParent()->getFunction(), Loc),
+                                       MBB->getParent()->getFunction(), Loc),
         MBB(MBB) {}
 
   /// MI-specific kinds of diagnostic Arguments.
@@ -73,7 +73,9 @@ public:
 
   /// \see DiagnosticInfoOptimizationBase::isEnabled.
   bool isEnabled() const override {
-    return OptimizationRemark::isEnabled(getPassName());
+    const Function &Fn = getFunction();
+    LLVMContext &Ctx = Fn.getContext();
+    return Ctx.getDiagHandlerPtr()->isPassedOptRemarkEnabled(getPassName());
   }
 };
 
@@ -97,7 +99,9 @@ public:
 
   /// \see DiagnosticInfoOptimizationBase::isEnabled.
   bool isEnabled() const override {
-    return OptimizationRemarkMissed::isEnabled(getPassName());
+    const Function &Fn = getFunction();
+    LLVMContext &Ctx = Fn.getContext();
+    return Ctx.getDiagHandlerPtr()->isMissedOptRemarkEnabled(getPassName());
   }
 };
 
@@ -121,7 +125,9 @@ public:
 
   /// \see DiagnosticInfoOptimizationBase::isEnabled.
   bool isEnabled() const override {
-    return OptimizationRemarkAnalysis::isEnabled(getPassName());
+    const Function &Fn = getFunction();
+    LLVMContext &Ctx = Fn.getContext();
+    return Ctx.getDiagHandlerPtr()->isAnalysisRemarkEnabled(getPassName());
   }
 };
 
@@ -152,10 +158,25 @@ public:
   /// that are normally too noisy.  In this mode, we can use the extra analysis
   /// (1) to filter trivial false positives or (2) to provide more context so
   /// that non-trivial false positives can be quickly detected by the user.
-  bool allowExtraAnalysis() const {
-    // For now, only allow this with -fsave-optimization-record since the -Rpass
-    // options are handled in the front-end.
-    return MF.getFunction()->getContext().getDiagnosticsOutputFile();
+  bool allowExtraAnalysis(StringRef PassName) const {
+    return (MF.getFunction().getContext().getDiagnosticsOutputFile() ||
+            MF.getFunction().getContext()
+            .getDiagHandlerPtr()->isAnyRemarkEnabled(PassName));
+  }
+
+  /// \brief Take a lambda that returns a remark which will be emitted.  Second
+  /// argument is only used to restrict this to functions.
+  template <typename T>
+  void emit(T RemarkBuilder, decltype(RemarkBuilder()) * = nullptr) {
+    // Avoid building the remark unless we know there are at least *some*
+    // remarks enabled. We can't currently check whether remarks are requested
+    // for the calling pass since that requires actually building the remark.
+
+    if (MF.getFunction().getContext().getDiagnosticsOutputFile() ||
+        MF.getFunction().getContext().getDiagHandlerPtr()->isAnyRemarkEnabled()) {
+      auto R = RemarkBuilder();
+      emit((DiagnosticInfoOptimizationBase &)R);
+    }
   }
 
 private:
diff --git a/include/llvm/CodeGen/MachinePostDominators.h b/include/llvm/CodeGen/MachinePostDominators.h
index d29d2d85cb0a5..c6a41598ce32c 100644
--- a/include/llvm/CodeGen/MachinePostDominators.h
+++ b/include/llvm/CodeGen/MachinePostDominators.h
@@ -37,7 +37,7 @@ public:
 
   FunctionPass *createMachinePostDominatorTreePass();
 
-  const std::vector<MachineBasicBlock *> &getRoots() const {
+  const SmallVectorImpl<MachineBasicBlock *> &getRoots() const {
     return DT->getRoots();
   }
 
diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h
index 5ef0ac90e3c2a..3be94f8021701 100644
--- a/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -27,9 +27,9 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/MC/LaneBitmask.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
@@ -84,14 +84,15 @@ private:
   /// all registers that were disabled are removed from the list.
   SmallVector<MCPhysReg, 16> UpdatedCSRs;
 
-  /// RegAllocHints - This vector records register allocation hints for virtual
-  /// registers. For each virtual register, it keeps a register and hint type
-  /// pair making up the allocation hint. Hint type is target specific except
-  /// for the value 0 which means the second value of the pair is the preferred
-  /// register for allocation. For example, if the hint is <0, 1024>, it means
-  /// the allocator should prefer the physical register allocated to the virtual
-  /// register of the hint.
-  IndexedMap<std::pair<unsigned, unsigned>, VirtReg2IndexFunctor> RegAllocHints;
+  /// RegAllocHints - This vector records register allocation hints for
+  /// virtual registers. For each virtual register, it keeps a pair of hint
+  /// type and hints vector making up the allocation hints. Only the first
+  /// hint may be target specific, and in that case this is reflected by the
+  /// first member of the pair being non-zero. If the hinted register is
+  /// virtual, it means the allocator should prefer the physical register
+  /// allocated to it if any.
+  IndexedMap<std::pair<unsigned, SmallVector<unsigned, 4>>,
+             VirtReg2IndexFunctor> RegAllocHints;
 
   /// PhysRegUseDefLists - This is an array of the head of the use/def list for
   /// physical registers.
@@ -575,14 +576,16 @@ public:
   /// preserve conservative kill flag information.
   void clearKillFlags(unsigned Reg) const;
 
-#ifndef NDEBUG
   void dumpUses(unsigned RegNo) const;
-#endif
 
   /// Returns true if PhysReg is unallocatable and constant throughout the
   /// function. Writing to a constant register has no effect.
   bool isConstantPhysReg(unsigned PhysReg) const;
 
+  /// Returns true if either isConstantPhysReg or TRI->isCallerPreservedPhysReg
+  /// returns true. This is a utility member function.
+  bool isCallerPreservedOrConstPhysReg(unsigned PhysReg) const;
+
   /// Get an iterator over the pressure sets affected by the given physical or
   /// virtual register. If RegUnit is physical, it must be a register unit (from
   /// MCRegUnitIterator).
@@ -704,35 +707,61 @@ public:
   void clearVirtRegs();
 
   /// setRegAllocationHint - Specify a register allocation hint for the
-  /// specified virtual register.
+  /// specified virtual register. This is typically used by target, and in case
+  /// of an earlier hint it will be overwritten.
   void setRegAllocationHint(unsigned VReg, unsigned Type, unsigned PrefReg) {
     assert(TargetRegisterInfo::isVirtualRegister(VReg));
     RegAllocHints[VReg].first  = Type;
-    RegAllocHints[VReg].second = PrefReg;
+    RegAllocHints[VReg].second.clear();
+    RegAllocHints[VReg].second.push_back(PrefReg);
   }
 
-  /// Specify the preferred register allocation hint for the specified virtual
-  /// register.
+  /// addRegAllocationHint - Add a register allocation hint to the hints
+  /// vector for VReg.
+  void addRegAllocationHint(unsigned VReg, unsigned PrefReg) {
+    assert(TargetRegisterInfo::isVirtualRegister(VReg));
+    RegAllocHints[VReg].second.push_back(PrefReg);
+  }
+
+  /// Specify the preferred (target independent) register allocation hint for
+  /// the specified virtual register.
   void setSimpleHint(unsigned VReg, unsigned PrefReg) {
     setRegAllocationHint(VReg, /*Type=*/0, PrefReg);
   }
 
+  void clearSimpleHint(unsigned VReg) {
+    assert (RegAllocHints[VReg].first == 0 &&
+            "Expected to clear a non-target hint!");
+    RegAllocHints[VReg].second.clear();
+  }
+
   /// getRegAllocationHint - Return the register allocation hint for the
-  /// specified virtual register.
+  /// specified virtual register. If there are many hints, this returns the
+  /// one with the greatest weight.
   std::pair<unsigned, unsigned>
   getRegAllocationHint(unsigned VReg) const {
     assert(TargetRegisterInfo::isVirtualRegister(VReg));
-    return RegAllocHints[VReg];
+    unsigned BestHint = (RegAllocHints[VReg].second.size() ?
+                         RegAllocHints[VReg].second[0] : 0);
+    return std::pair<unsigned, unsigned>(RegAllocHints[VReg].first, BestHint);
   }
 
-  /// getSimpleHint - Return the preferred register allocation hint, or 0 if a
-  /// standard simple hint (Type == 0) is not set.
+  /// getSimpleHint - same as getRegAllocationHint except it will only return
+  /// a target independent hint.
   unsigned getSimpleHint(unsigned VReg) const {
     assert(TargetRegisterInfo::isVirtualRegister(VReg));
     std::pair<unsigned, unsigned> Hint = getRegAllocationHint(VReg);
     return Hint.first ? 0 : Hint.second;
   }
 
+  /// getRegAllocationHints - Return a reference to the vector of all
+  /// register allocation hints for VReg.
+  const std::pair<unsigned, SmallVector<unsigned, 4>>
+  &getRegAllocationHints(unsigned VReg) const {
+    assert(TargetRegisterInfo::isVirtualRegister(VReg));
+    return RegAllocHints[VReg];
+  }
+
   /// markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the
   /// specified register as undefined which causes the DBG_VALUE to be
   /// deleted during LiveDebugVariables analysis.
@@ -844,6 +873,10 @@ public:
   livein_iterator livein_end()   const { return LiveIns.end(); }
   bool            livein_empty() const { return LiveIns.empty(); }
 
+  ArrayRef<std::pair<unsigned, unsigned>> liveins() const {
+    return LiveIns;
+  }
+
   bool isLiveIn(unsigned Reg) const;
 
   /// getLiveInPhysReg - If VReg is a live-in virtual register, return the
diff --git a/include/llvm/CodeGen/MachineSSAUpdater.h b/include/llvm/CodeGen/MachineSSAUpdater.h
index 50a7d90bf25b0..b5ea2080444d5 100644
--- a/include/llvm/CodeGen/MachineSSAUpdater.h
+++ b/include/llvm/CodeGen/MachineSSAUpdater.h
@@ -1,4 +1,4 @@
-//===-- MachineSSAUpdater.h - Unstructured SSA Update Tool ------*- C++ -*-===//
+//===- MachineSSAUpdater.h - Unstructured SSA Update Tool -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,18 +14,17 @@
 #ifndef LLVM_CODEGEN_MACHINESSAUPDATER_H
 #define LLVM_CODEGEN_MACHINESSAUPDATER_H
 
-#include "llvm/Support/Compiler.h"
-
 namespace llvm {
-  class MachineBasicBlock;
-  class MachineFunction;
-  class MachineInstr;
-  class MachineOperand;
-  class MachineRegisterInfo;
-  class TargetInstrInfo;
-  class TargetRegisterClass;
-  template<typename T> class SmallVectorImpl;
-  template<typename T> class SSAUpdaterTraits;
+
+class MachineBasicBlock;
+class MachineFunction;
+class MachineInstr;
+class MachineOperand;
+class MachineRegisterInfo;
+class TargetInstrInfo;
+class TargetRegisterClass;
+template<typename T> class SmallVectorImpl;
+template<typename T> class SSAUpdaterTraits;
 
 /// MachineSSAUpdater - This class updates SSA form for a set of virtual
 /// registers defined in multiple blocks.  This is used when code duplication
@@ -38,7 +37,7 @@ private:
   /// AvailableVals - This keeps track of which value to use on a per-block
   /// basis.  When we insert PHI nodes, we keep track of them here.
   //typedef DenseMap<MachineBasicBlock*, unsigned > AvailableValsTy;
-  void *AV;
+  void *AV = nullptr;
 
   /// VR - Current virtual register whose uses are being updated.
   unsigned VR;
@@ -52,11 +51,14 @@ private:
 
   const TargetInstrInfo *TII;
   MachineRegisterInfo *MRI;
+
 public:
   /// MachineSSAUpdater constructor.  If InsertedPHIs is specified, it will be
   /// filled in with all PHI Nodes created by rewriting.
   explicit MachineSSAUpdater(MachineFunction &MF,
                         SmallVectorImpl<MachineInstr*> *InsertedPHIs = nullptr);
+  MachineSSAUpdater(const MachineSSAUpdater &) = delete;
+  MachineSSAUpdater &operator=(const MachineSSAUpdater &) = delete;
   ~MachineSSAUpdater();
 
   /// Initialize - Reset this object to get ready for a new set of SSA
@@ -93,7 +95,6 @@ public:
   /// their respective blocks.  However, the use of X happens in the *middle* of
   /// a block.  Because of this, we need to insert a new PHI node in SomeBB to
   /// merge the appropriate values, and this value isn't live out of the block.
-  ///
   unsigned GetValueInMiddleOfBlock(MachineBasicBlock *BB);
 
   /// RewriteUse - Rewrite a use of the symbolic value.  This handles PHI nodes,
@@ -105,11 +106,8 @@ public:
 
 private:
   unsigned GetValueAtEndOfBlockInternal(MachineBasicBlock *BB);
-
-  void operator=(const MachineSSAUpdater&) = delete;
-  MachineSSAUpdater(const MachineSSAUpdater&) = delete;
 };
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_CODEGEN_MACHINESSAUPDATER_H
diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h
index 8590b7a348cfc..e327881de13aa 100644
--- a/include/llvm/CodeGen/MachineScheduler.h
+++ b/include/llvm/CodeGen/MachineScheduler.h
@@ -26,7 +26,7 @@
 // The default scheduler, ScheduleDAGMILive, builds the DAG and drives list
 // scheduling while updating the instruction stream, register pressure, and live
 // intervals. Most targets don't need to override the DAG builder and list
-// schedulier, but subtargets that require custom scheduling heuristics may
+// scheduler, but subtargets that require custom scheduling heuristics may
 // plugin an alternate MachineSchedStrategy. The strategy is responsible for
 // selecting the highest priority node from the list:
 //
@@ -214,9 +214,20 @@ public:
   /// This has to be enabled in combination with shouldTrackPressure().
   virtual bool shouldTrackLaneMasks() const { return false; }
 
+  // If this method returns true, handling of the scheduling regions
+  // themselves (in case of a scheduling boundary in MBB) will be done
+  // beginning with the topmost region of MBB.
+  virtual bool doMBBSchedRegionsTopDown() const { return false; }
+
   /// Initialize the strategy after building the DAG for a new region.
   virtual void initialize(ScheduleDAGMI *DAG) = 0;
 
+  /// Tell the strategy that MBB is about to be processed.
+  virtual void enterMBB(MachineBasicBlock *MBB) {};
+
+  /// Tell the strategy that current MBB is done.
+  virtual void leaveMBB() {};
+
   /// Notify this strategy that all roots have been released (including those
   /// that depend on EntrySU or ExitSU).
   virtual void registerRoots() {}
@@ -284,6 +295,13 @@ public:
   // Provide a vtable anchor
   ~ScheduleDAGMI() override;
 
+  /// If this method returns true, handling of the scheduling regions
+  /// themselves (in case of a scheduling boundary in MBB) will be done
+  /// beginning with the topmost region of MBB.
+  bool doMBBSchedRegionsTopDown() const override {
+    return SchedImpl->doMBBSchedRegionsTopDown();
+  }
+
   // Returns LiveIntervals instance for use in DAG mutators and such.
   LiveIntervals *getLIS() const { return LIS; }
 
@@ -326,6 +344,9 @@ public:
   /// reorderable instructions.
   void schedule() override;
 
+  void startBlock(MachineBasicBlock *bb) override;
+  void finishBlock() override;
+
   /// Change the position of an instruction within the basic block and update
   /// live ranges and region boundary iterators.
   void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
@@ -755,9 +776,7 @@ public:
   /// available instruction, or NULL if there are multiple candidates.
   SUnit *pickOnlyChoice();
 
-#ifndef NDEBUG
   void dumpScheduledState() const;
-#endif
 };
 
 /// Base class for GenericScheduler. This class maintains information about
diff --git a/include/llvm/CodeGen/MachineTraceMetrics.h b/include/llvm/CodeGen/MachineTraceMetrics.h
index 284f8c1976076..9d8db393ca92a 100644
--- a/include/llvm/CodeGen/MachineTraceMetrics.h
+++ b/include/llvm/CodeGen/MachineTraceMetrics.h
@@ -47,17 +47,18 @@
 #ifndef LLVM_CODEGEN_MACHINETRACEMETRICS_H
 #define LLVM_CODEGEN_MACHINETRACEMETRICS_H
 
+#include "llvm/ADT/SparseSet.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/TargetSchedule.h"
 
 namespace llvm {
 
 class AnalysisUsage;
-class MachineBasicBlock;
 class MachineFunction;
 class MachineInstr;
 class MachineLoop;
@@ -68,6 +69,22 @@ class raw_ostream;
 class TargetInstrInfo;
 class TargetRegisterInfo;
 
+// Keep track of physreg data dependencies by recording each live register unit.
+// Associate each regunit with an instruction operand. Depending on the
+// direction instructions are scanned, it could be the operand that defined the
+// regunit, or the highest operand to read the regunit.
+struct LiveRegUnit {
+  unsigned RegUnit;
+  unsigned Cycle = 0;
+  const MachineInstr *MI = nullptr;
+  unsigned Op = 0;
+
+  unsigned getSparseSetIndex() const { return RegUnit; }
+
+  LiveRegUnit(unsigned RU) : RegUnit(RU) {}
+};
+
+
 class MachineTraceMetrics : public MachineFunctionPass {
   const MachineFunction *MF = nullptr;
   const TargetInstrInfo *TII = nullptr;
@@ -343,6 +360,18 @@ public:
     /// Get the trace that passes through MBB.
     /// The trace is computed on demand.
     Trace getTrace(const MachineBasicBlock *MBB);
+
+    /// Updates the depth of an machine instruction, given RegUnits.
+    void updateDepth(TraceBlockInfo &TBI, const MachineInstr&,
+                     SparseSet<LiveRegUnit> &RegUnits);
+    void updateDepth(const MachineBasicBlock *, const MachineInstr&,
+                     SparseSet<LiveRegUnit> &RegUnits);
+
+    /// Updates the depth of the instructions from Start to End.
+    void updateDepths(MachineBasicBlock::iterator Start,
+                      MachineBasicBlock::iterator End,
+                      SparseSet<LiveRegUnit> &RegUnits);
+
   };
 
   /// Strategies for selecting traces.
diff --git a/include/llvm/CodeGen/MachineValueType.h b/include/llvm/CodeGen/MachineValueType.h
index 0bdb38bfcbec8..b452684757f63 100644
--- a/include/llvm/CodeGen/MachineValueType.h
+++ b/include/llvm/CodeGen/MachineValueType.h
@@ -64,80 +64,81 @@ namespace llvm {
       v16i1          =  18,   //   16 x i1
       v32i1          =  19,   //   32 x i1
       v64i1          =  20,   //   64 x i1
-      v512i1         =  21,   //  512 x i1
-      v1024i1        =  22,   // 1024 x i1
-
-      v1i8           =  23,   //  1 x i8
-      v2i8           =  24,   //  2 x i8
-      v4i8           =  25,   //  4 x i8
-      v8i8           =  26,   //  8 x i8
-      v16i8          =  27,   // 16 x i8
-      v32i8          =  28,   // 32 x i8
-      v64i8          =  29,   // 64 x i8
-      v128i8         =  30,   //128 x i8
-      v256i8         =  31,   //256 x i8
-
-      v1i16          =  32,   //  1 x i16
-      v2i16          =  33,   //  2 x i16
-      v4i16          =  34,   //  4 x i16
-      v8i16          =  35,   //  8 x i16
-      v16i16         =  36,   // 16 x i16
-      v32i16         =  37,   // 32 x i16
-      v64i16         =  38,   // 64 x i16
-      v128i16        =  39,   //128 x i16
-
-      v1i32          =  40,   //  1 x i32
-      v2i32          =  41,   //  2 x i32
-      v4i32          =  42,   //  4 x i32
-      v8i32          =  43,   //  8 x i32
-      v16i32         =  44,   // 16 x i32
-      v32i32         =  45,   // 32 x i32
-      v64i32         =  46,   // 64 x i32
-
-      v1i64          =  47,   //  1 x i64
-      v2i64          =  48,   //  2 x i64
-      v4i64          =  49,   //  4 x i64
-      v8i64          =  50,   //  8 x i64
-      v16i64         =  51,   // 16 x i64
-      v32i64         =  52,   // 32 x i64
-
-      v1i128         =  53,   //  1 x i128
+      v128i1         =  21,   //  128 x i1
+      v512i1         =  22,   //  512 x i1
+      v1024i1        =  23,   // 1024 x i1
+
+      v1i8           =  24,   //  1 x i8
+      v2i8           =  25,   //  2 x i8
+      v4i8           =  26,   //  4 x i8
+      v8i8           =  27,   //  8 x i8
+      v16i8          =  28,   // 16 x i8
+      v32i8          =  29,   // 32 x i8
+      v64i8          =  30,   // 64 x i8
+      v128i8         =  31,   //128 x i8
+      v256i8         =  32,   //256 x i8
+
+      v1i16          =  33,   //  1 x i16
+      v2i16          =  34,   //  2 x i16
+      v4i16          =  35,   //  4 x i16
+      v8i16          =  36,   //  8 x i16
+      v16i16         =  37,   // 16 x i16
+      v32i16         =  38,   // 32 x i16
+      v64i16         =  39,   // 64 x i16
+      v128i16        =  40,   //128 x i16
+
+      v1i32          =  41,   //  1 x i32
+      v2i32          =  42,   //  2 x i32
+      v4i32          =  43,   //  4 x i32
+      v8i32          =  44,   //  8 x i32
+      v16i32         =  45,   // 16 x i32
+      v32i32         =  46,   // 32 x i32
+      v64i32         =  47,   // 64 x i32
+
+      v1i64          =  48,   //  1 x i64
+      v2i64          =  49,   //  2 x i64
+      v4i64          =  50,   //  4 x i64
+      v8i64          =  51,   //  8 x i64
+      v16i64         =  52,   // 16 x i64
+      v32i64         =  53,   // 32 x i64
+
+      v1i128         =  54,   //  1 x i128
 
       // Scalable integer types
-      nxv1i1         =  54,   // n x  1 x i1
-      nxv2i1         =  55,   // n x  2 x i1
-      nxv4i1         =  56,   // n x  4 x i1
-      nxv8i1         =  57,   // n x  8 x i1
-      nxv16i1        =  58,   // n x 16 x i1
-      nxv32i1        =  59,   // n x 32 x i1
-
-      nxv1i8         =  60,   // n x  1 x i8
-      nxv2i8         =  61,   // n x  2 x i8
-      nxv4i8         =  62,   // n x  4 x i8
-      nxv8i8         =  63,   // n x  8 x i8
-      nxv16i8        =  64,   // n x 16 x i8
-      nxv32i8        =  65,   // n x 32 x i8
-
-      nxv1i16        =  66,   // n x  1 x i16
-      nxv2i16        =  67,   // n x  2 x i16
-      nxv4i16        =  68,   // n x  4 x i16
-      nxv8i16        =  69,   // n x  8 x i16
-      nxv16i16       =  70,   // n x 16 x i16
-      nxv32i16       =  71,   // n x 32 x i16
-
-      nxv1i32        =  72,   // n x  1 x i32
-      nxv2i32        =  73,   // n x  2 x i32
-      nxv4i32        =  74,   // n x  4 x i32
-      nxv8i32        =  75,   // n x  8 x i32
-      nxv16i32       =  76,   // n x 16 x i32
-      nxv32i32       =  77,   // n x 32 x i32
-
-      nxv1i64        =  78,   // n x  1 x i64
-      nxv2i64        =  79,   // n x  2 x i64
-      nxv4i64        =  80,   // n x  4 x i64
-      nxv8i64        =  81,   // n x  8 x i64
-      nxv16i64       =  82,   // n x 16 x i64
-      nxv32i64       =  83,   // n x 32 x i64
+      nxv1i1         =  55,   // n x  1 x i1
+      nxv2i1         =  56,   // n x  2 x i1
+      nxv4i1         =  57,   // n x  4 x i1
+      nxv8i1         =  58,   // n x  8 x i1
+      nxv16i1        =  59,   // n x 16 x i1
+      nxv32i1        =  60,   // n x 32 x i1
+
+      nxv1i8         =  61,   // n x  1 x i8
+      nxv2i8         =  62,   // n x  2 x i8
+      nxv4i8         =  63,   // n x  4 x i8
+      nxv8i8         =  64,   // n x  8 x i8
+      nxv16i8        =  65,   // n x 16 x i8
+      nxv32i8        =  66,   // n x 32 x i8
+
+      nxv1i16        =  67,   // n x  1 x i16
+      nxv2i16        =  68,   // n x  2 x i16
+      nxv4i16        =  69,   // n x  4 x i16
+      nxv8i16        =  70,   // n x  8 x i16
+      nxv16i16       =  71,   // n x 16 x i16
+      nxv32i16       =  72,   // n x 32 x i16
+
+      nxv1i32        =  73,   // n x  1 x i32
+      nxv2i32        =  74,   // n x  2 x i32
+      nxv4i32        =  75,   // n x  4 x i32
+      nxv8i32        =  76,   // n x  8 x i32
+      nxv16i32       =  77,   // n x 16 x i32
+      nxv32i32       =  78,   // n x 32 x i32
+
+      nxv1i64        =  79,   // n x  1 x i64
+      nxv2i64        =  80,   // n x  2 x i64
+      nxv4i64        =  81,   // n x  4 x i64
+      nxv8i64        =  82,   // n x  8 x i64
+      nxv16i64       =  83,   // n x 16 x i64
+      nxv32i64       =  84,   // n x 32 x i64
 
       FIRST_INTEGER_VECTOR_VALUETYPE = v1i1,
       LAST_INTEGER_VECTOR_VALUETYPE = nxv32i64,
@@ -145,31 +146,31 @@ namespace llvm {
       FIRST_INTEGER_SCALABLE_VALUETYPE = nxv1i1,
       LAST_INTEGER_SCALABLE_VALUETYPE = nxv32i64,
 
-      v2f16          =  84,   //  2 x f16
-      v4f16          =  85,   //  4 x f16
-      v8f16          =  86,   //  8 x f16
-      v1f32          =  87,   //  1 x f32
-      v2f32          =  88,   //  2 x f32
-      v4f32          =  89,   //  4 x f32
-      v8f32          =  90,   //  8 x f32
-      v16f32         =  91,   // 16 x f32
-      v1f64          =  92,   //  1 x f64
-      v2f64          =  93,   //  2 x f64
-      v4f64          =  94,   //  4 x f64
-      v8f64          =  95,   //  8 x f64
-
-      nxv2f16        =  96,   // n x  2 x f16
-      nxv4f16        =  97,   // n x  4 x f16
-      nxv8f16        =  98,   // n x  8 x f16
-      nxv1f32        =  99,   // n x  1 x f32
-      nxv2f32        = 100,   // n x  2 x f32
-      nxv4f32        = 101,   // n x  4 x f32
-      nxv8f32        = 102,   // n x  8 x f32
-      nxv16f32       = 103,   // n x 16 x f32
-      nxv1f64        = 104,   // n x  1 x f64
-      nxv2f64        = 105,   // n x  2 x f64
-      nxv4f64        = 106,   // n x  4 x f64
-      nxv8f64        = 107,   // n x  8 x f64
+      v2f16          =  85,   //  2 x f16
+      v4f16          =  86,   //  4 x f16
+      v8f16          =  87,   //  8 x f16
+      v1f32          =  88,   //  1 x f32
+      v2f32          =  89,   //  2 x f32
+      v4f32          =  90,   //  4 x f32
+      v8f32          =  91,   //  8 x f32
+      v16f32         =  92,   // 16 x f32
+      v1f64          =  93,   //  1 x f64
+      v2f64          =  94,   //  2 x f64
+      v4f64          =  95,   //  4 x f64
+      v8f64          =  96,   //  8 x f64
+
+      nxv2f16        =  97,   // n x  2 x f16
+      nxv4f16        =  98,   // n x  4 x f16
+      nxv8f16        =  99,   // n x  8 x f16
+      nxv1f32        = 100,   // n x  1 x f32
+      nxv2f32        = 101,   // n x  2 x f32
+      nxv4f32        = 102,   // n x  4 x f32
+      nxv8f32        = 103,   // n x  8 x f32
+      nxv16f32       = 104,   // n x 16 x f32
+      nxv1f64        = 105,   // n x  1 x f64
+      nxv2f64        = 106,   // n x  2 x f64
+      nxv4f64        = 107,   // n x  4 x f64
+      nxv8f64        = 108,   // n x  8 x f64
 
       FIRST_FP_VECTOR_VALUETYPE = v2f16,
       LAST_FP_VECTOR_VALUETYPE = nxv8f64,
@@ -180,18 +181,18 @@ namespace llvm {
       FIRST_VECTOR_VALUETYPE = v1i1,
       LAST_VECTOR_VALUETYPE  = nxv8f64,
 
-      x86mmx         =  108,   // This is an X86 MMX value
+      x86mmx         =  109,   // This is an X86 MMX value
 
-      Glue           =  109,   // This glues nodes together during pre-RA sched
+      Glue           =  110,   // This glues nodes together during pre-RA sched
 
-      isVoid         =  110,   // This has no value
+      isVoid         =  111,   // This has no value
 
-      Untyped        =  111,   // This value takes a register, but has
+      Untyped        =  112,   // This value takes a register, but has
                                // unspecified type.  The register class
                                // will be determined by the opcode.
 
       FIRST_VALUETYPE = 1,     // This is always the beginning of the list.
-      LAST_VALUETYPE =  112,   // This always remains at the end of the list.
+      LAST_VALUETYPE =  113,   // This always remains at the end of the list.
 
       // This is the current maximum for LAST_VALUETYPE.
       // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
@@ -346,10 +347,11 @@ namespace llvm {
 
     /// Return true if this is a 128-bit vector type.
     bool is128BitVector() const {
-      return (SimpleTy == MVT::v16i8  || SimpleTy == MVT::v8i16 ||
-              SimpleTy == MVT::v4i32  || SimpleTy == MVT::v2i64 ||
-              SimpleTy == MVT::v1i128 || SimpleTy == MVT::v8f16 ||
-              SimpleTy == MVT::v4f32  || SimpleTy == MVT::v2f64);
+      return (SimpleTy == MVT::v128i1 || SimpleTy == MVT::v16i8  ||
+              SimpleTy == MVT::v8i16  || SimpleTy == MVT::v4i32  ||
+              SimpleTy == MVT::v2i64  || SimpleTy == MVT::v1i128 ||
+              SimpleTy == MVT::v8f16  || SimpleTy == MVT::v4f32  ||
+              SimpleTy == MVT::v2f64);
     }
 
     /// Return true if this is a 256-bit vector type.
@@ -420,6 +422,7 @@ namespace llvm {
       case v16i1:
       case v32i1:
       case v64i1:
+      case v128i1:
       case v512i1:
       case v1024i1:
       case nxv1i1:
@@ -517,6 +520,7 @@ namespace llvm {
       case v1024i1: return 1024;
       case v512i1: return 512;
       case v256i8: return 256;
+      case v128i1:
       case v128i8:
       case v128i16: return 128;
       case v64i1:
@@ -690,6 +694,7 @@ namespace llvm {
       case f128:
       case ppcf128:
       case i128:
+      case v128i1:
       case v16i8:
       case v8i16:
       case v4i32:
@@ -828,6 +833,7 @@ namespace llvm {
         if (NumElements == 16)   return MVT::v16i1;
         if (NumElements == 32)   return MVT::v32i1;
         if (NumElements == 64)   return MVT::v64i1;
+        if (NumElements == 128)  return MVT::v128i1;
         if (NumElements == 512)  return MVT::v512i1;
         if (NumElements == 1024) return MVT::v1024i1;
         break;
diff --git a/include/llvm/CodeGen/PBQP/Solution.h b/include/llvm/CodeGen/PBQP/Solution.h
index 8d5d2374679d3..6a247277fdfab 100644
--- a/include/llvm/CodeGen/PBQP/Solution.h
+++ b/include/llvm/CodeGen/PBQP/Solution.h
@@ -29,11 +29,6 @@ namespace PBQP {
     using SelectionsMap = std::map<GraphBase::NodeId, unsigned>;
     SelectionsMap selections;
 
-    unsigned r0Reductions = 0;
-    unsigned r1Reductions = 0;
-    unsigned r2Reductions = 0;
-    unsigned rNReductions = 0;
-
   public:
     /// \brief Initialise an empty solution.
     Solution() = default;
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index 96cfce5b84dfe..4370d116e08c3 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -43,9 +43,6 @@ namespace llvm {
   /// the entry block.
   FunctionPass *createUnreachableBlockEliminationPass();
 
-  /// Insert mcount-like function calls.
-  FunctionPass *createCountingFunctionInserterPass();
-
   /// MachineFunctionPrinter pass - This pass prints out the machine function to
   /// the given stream as a debugging tool.
   MachineFunctionPass *
@@ -409,17 +406,17 @@ namespace llvm {
   /// This pass frees the memory occupied by the MachineFunction.
   FunctionPass *createFreeMachineFunctionPass();
 
-  /// This pass combine basic blocks guarded by the same branch.
-  extern char &BranchCoalescingID;
-
   /// This pass performs outlining on machine instructions directly before
   /// printing assembly.
-  ModulePass *createMachineOutlinerPass();
+  ModulePass *createMachineOutlinerPass(bool OutlineFromLinkOnceODRs = false);
 
   /// This pass expands the experimental reduction intrinsics into sequences of
   /// shuffles.
   FunctionPass *createExpandReductionsPass();
 
+  // This pass expands memcmp() to load/stores.
+  FunctionPass *createExpandMemCmpPass();
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/PreISelIntrinsicLowering.h b/include/llvm/CodeGen/PreISelIntrinsicLowering.h
index 765ca085244aa..7a007eb8bceac 100644
--- a/include/llvm/CodeGen/PreISelIntrinsicLowering.h
+++ b/include/llvm/CodeGen/PreISelIntrinsicLowering.h
@@ -1,4 +1,4 @@
-//===--- PreISelIntrinsicLowering.h - Pre-ISel intrinsic lowering pass ----===//
+//===- PreISelIntrinsicLowering.h - Pre-ISel intrinsic lowering pass ------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -17,10 +17,13 @@
 
 namespace llvm {
 
+class Module;
+
 struct PreISelIntrinsicLoweringPass
     : PassInfoMixin<PreISelIntrinsicLoweringPass> {
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 };
-}
+
+} // end namespace llvm
 
 #endif // LLVM_CODEGEN_PREISELINTRINSICLOWERING_H
diff --git a/include/llvm/CodeGen/PseudoSourceValue.h b/include/llvm/CodeGen/PseudoSourceValue.h
index f5aedb07e4d2b..bdf0bb7315402 100644
--- a/include/llvm/CodeGen/PseudoSourceValue.h
+++ b/include/llvm/CodeGen/PseudoSourceValue.h
@@ -25,6 +25,7 @@ namespace llvm {
 class MachineFrameInfo;
 class MachineMemOperand;
 class raw_ostream;
+class TargetInstrInfo;
 
 raw_ostream &operator<<(raw_ostream &OS, const MachineMemOperand &MMO);
 class PseudoSourceValue;
@@ -48,6 +49,7 @@ public:
 
 private:
   PSVKind Kind;
+  unsigned AddressSpace;
   friend raw_ostream &llvm::operator<<(raw_ostream &OS,
                                        const PseudoSourceValue* PSV);
 
@@ -58,7 +60,7 @@ private:
   virtual void printCustom(raw_ostream &O) const;
 
 public:
-  explicit PseudoSourceValue(PSVKind Kind);
+  explicit PseudoSourceValue(PSVKind Kind, const TargetInstrInfo &TII);
 
   virtual ~PseudoSourceValue();
 
@@ -68,6 +70,9 @@ public:
   bool isGOT() const { return Kind == GOT; }
   bool isConstantPool() const { return Kind == ConstantPool; }
   bool isJumpTable() const { return Kind == JumpTable; }
+
+  unsigned getAddressSpace() const { return AddressSpace; }
+
   unsigned getTargetCustom() const {
     return (Kind >= TargetCustom) ? ((Kind+1) - TargetCustom) : 0;
   }
@@ -91,8 +96,8 @@ class FixedStackPseudoSourceValue : public PseudoSourceValue {
   const int FI;
 
 public:
-  explicit FixedStackPseudoSourceValue(int FI)
-      : PseudoSourceValue(FixedStack), FI(FI) {}
+  explicit FixedStackPseudoSourceValue(int FI, const TargetInstrInfo &TII)
+      : PseudoSourceValue(FixedStack, TII), FI(FI) {}
 
   static bool classof(const PseudoSourceValue *V) {
     return V->kind() == FixedStack;
@@ -111,7 +116,7 @@ public:
 
 class CallEntryPseudoSourceValue : public PseudoSourceValue {
 protected:
-  CallEntryPseudoSourceValue(PSVKind Kind);
+  CallEntryPseudoSourceValue(PSVKind Kind, const TargetInstrInfo &TII);
 
 public:
   bool isConstant(const MachineFrameInfo *) const override;
@@ -124,7 +129,8 @@ class GlobalValuePseudoSourceValue : public CallEntryPseudoSourceValue {
   const GlobalValue *GV;
 
 public:
-  GlobalValuePseudoSourceValue(const GlobalValue *GV);
+  GlobalValuePseudoSourceValue(const GlobalValue *GV,
+                               const TargetInstrInfo &TII);
 
   static bool classof(const PseudoSourceValue *V) {
     return V->kind() == GlobalValueCallEntry;
@@ -138,7 +144,7 @@ class ExternalSymbolPseudoSourceValue : public CallEntryPseudoSourceValue {
   const char *ES;
 
 public:
-  ExternalSymbolPseudoSourceValue(const char *ES);
+  ExternalSymbolPseudoSourceValue(const char *ES, const TargetInstrInfo &TII);
 
   static bool classof(const PseudoSourceValue *V) {
     return V->kind() == ExternalSymbolCallEntry;
@@ -149,6 +155,7 @@ public:
 
 /// Manages creation of pseudo source values.
 class PseudoSourceValueManager {
+  const TargetInstrInfo &TII;
   const PseudoSourceValue StackPSV, GOTPSV, JumpTablePSV, ConstantPoolPSV;
   std::map<int, std::unique_ptr<FixedStackPseudoSourceValue>> FSValues;
   StringMap<std::unique_ptr<const ExternalSymbolPseudoSourceValue>>
@@ -158,7 +165,7 @@ class PseudoSourceValueManager {
       GlobalCallEntries;
 
 public:
-  PseudoSourceValueManager();
+  PseudoSourceValueManager(const TargetInstrInfo &TII);
 
   /// Return a pseudo source value referencing the area below the stack frame of
   /// a function, e.g., the argument space.
diff --git a/include/llvm/CodeGen/RegisterClassInfo.h b/include/llvm/CodeGen/RegisterClassInfo.h
index 355c9f9b2f1e6..97113c575815b 100644
--- a/include/llvm/CodeGen/RegisterClassInfo.h
+++ b/include/llvm/CodeGen/RegisterClassInfo.h
@@ -20,8 +20,8 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include <cassert>
 #include <cstdint>
 #include <memory>
diff --git a/include/llvm/CodeGen/RegisterPressure.h b/include/llvm/CodeGen/RegisterPressure.h
index e997aaf269e31..2b14b78d621d7 100644
--- a/include/llvm/CodeGen/RegisterPressure.h
+++ b/include/llvm/CodeGen/RegisterPressure.h
@@ -20,8 +20,8 @@
 #include "llvm/ADT/SparseSet.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/MC/LaneBitmask.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
diff --git a/include/llvm/CodeGen/RegisterUsageInfo.h b/include/llvm/CodeGen/RegisterUsageInfo.h
index 0a04bc6a89f4d..eabadd8d784a8 100644
--- a/include/llvm/CodeGen/RegisterUsageInfo.h
+++ b/include/llvm/CodeGen/RegisterUsageInfo.h
@@ -20,6 +20,7 @@
 #define LLVM_CODEGEN_PHYSICALREGISTERUSAGEINFO_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/Pass.h"
 #include <cstdint>
 #include <vector>
diff --git a/include/llvm/CodeGen/ResourcePriorityQueue.h b/include/llvm/CodeGen/ResourcePriorityQueue.h
index 9c8f5f487d382..03166ccdfe384 100644
--- a/include/llvm/CodeGen/ResourcePriorityQueue.h
+++ b/include/llvm/CodeGen/ResourcePriorityQueue.h
@@ -20,15 +20,15 @@
 #include "llvm/CodeGen/DFAPacketizer.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 
 namespace llvm {
   class ResourcePriorityQueue;
 
   /// Sorting functions for the Available queue.
-  struct resource_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+  struct resource_sort {
     ResourcePriorityQueue *PQ;
     explicit resource_sort(ResourcePriorityQueue *pq) : PQ(pq) {}
 
diff --git a/include/llvm/CodeGen/RuntimeLibcalls.def b/include/llvm/CodeGen/RuntimeLibcalls.def
new file mode 100644
index 0000000000000..e042ae982e86c
--- /dev/null
+++ b/include/llvm/CodeGen/RuntimeLibcalls.def
@@ -0,0 +1,492 @@
+//===-- llvm/RuntimeLibcalls.def - File that describes libcalls -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the runtime library calls the backend can emit.
+// The various long double types cannot be merged, because 80-bit library
+// functions use "xf" and 128-bit use "tf".
+//
+// When adding PPCF128 functions here, note that their names generally need
+// to be overridden for Darwin with the xxx$LDBL128 form.  See
+// PPCISelLowering.cpp.
+//
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+// Provide definitions of macros so that users of this file do not have to
+// define everything to use it...
+
+// Declare the enumerator for each libcall, along with its default name. Some
+// libcalls have different names on particular OSes or architectures. These
+// are set in InitLibcallNames() in TargetLoweringBase.cpp and/or by targets
+// using TargetLoweringBase::setLibcallName()
+#ifndef HANDLE_LIBCALL
+#error "HANDLE_LIBCALL must be defined"
+#endif
+
+// Integer
+HANDLE_LIBCALL(SHL_I16, "__ashlhi3")
+HANDLE_LIBCALL(SHL_I32, "__ashlsi3")
+HANDLE_LIBCALL(SHL_I64, "__ashldi3")
+HANDLE_LIBCALL(SHL_I128, "__ashlti3")
+HANDLE_LIBCALL(SRL_I16, "__lshrhi3")
+HANDLE_LIBCALL(SRL_I32, "__lshrsi3")
+HANDLE_LIBCALL(SRL_I64, "__lshrdi3")
+HANDLE_LIBCALL(SRL_I128, "__lshrti3")
+HANDLE_LIBCALL(SRA_I16, "__ashrhi3")
+HANDLE_LIBCALL(SRA_I32, "__ashrsi3")
+HANDLE_LIBCALL(SRA_I64, "__ashrdi3")
+HANDLE_LIBCALL(SRA_I128, "__ashrti3")
+HANDLE_LIBCALL(MUL_I8, "__mulqi3")
+HANDLE_LIBCALL(MUL_I16, "__mulhi3")
+HANDLE_LIBCALL(MUL_I32, "__mulsi3")
+HANDLE_LIBCALL(MUL_I64, "__muldi3")
+HANDLE_LIBCALL(MUL_I128, "__multi3")
+HANDLE_LIBCALL(MULO_I32, "__mulosi4")
+HANDLE_LIBCALL(MULO_I64, "__mulodi4")
+HANDLE_LIBCALL(MULO_I128, "__muloti4")
+HANDLE_LIBCALL(SDIV_I8, "__divqi3")
+HANDLE_LIBCALL(SDIV_I16, "__divhi3")
+HANDLE_LIBCALL(SDIV_I32, "__divsi3")
+HANDLE_LIBCALL(SDIV_I64, "__divdi3")
+HANDLE_LIBCALL(SDIV_I128, "__divti3")
+HANDLE_LIBCALL(UDIV_I8, "__udivqi3")
+HANDLE_LIBCALL(UDIV_I16, "__udivhi3")
+HANDLE_LIBCALL(UDIV_I32, "__udivsi3")
+HANDLE_LIBCALL(UDIV_I64, "__udivdi3")
+HANDLE_LIBCALL(UDIV_I128, "__udivti3")
+HANDLE_LIBCALL(SREM_I8, "__modqi3")
+HANDLE_LIBCALL(SREM_I16, "__modhi3")
+HANDLE_LIBCALL(SREM_I32, "__modsi3")
+HANDLE_LIBCALL(SREM_I64, "__moddi3")
+HANDLE_LIBCALL(SREM_I128, "__modti3")
+HANDLE_LIBCALL(UREM_I8, "__umodqi3")
+HANDLE_LIBCALL(UREM_I16, "__umodhi3")
+HANDLE_LIBCALL(UREM_I32, "__umodsi3")
+HANDLE_LIBCALL(UREM_I64, "__umoddi3")
+HANDLE_LIBCALL(UREM_I128, "__umodti3")
+HANDLE_LIBCALL(SDIVREM_I8, nullptr)
+HANDLE_LIBCALL(SDIVREM_I16, nullptr)
+HANDLE_LIBCALL(SDIVREM_I32, nullptr)
+HANDLE_LIBCALL(SDIVREM_I64, nullptr)
+HANDLE_LIBCALL(SDIVREM_I128, nullptr)
+HANDLE_LIBCALL(UDIVREM_I8, nullptr)
+HANDLE_LIBCALL(UDIVREM_I16, nullptr)
+HANDLE_LIBCALL(UDIVREM_I32, nullptr)
+HANDLE_LIBCALL(UDIVREM_I64, nullptr)
+HANDLE_LIBCALL(UDIVREM_I128, nullptr)
+HANDLE_LIBCALL(NEG_I32, "__negsi2")
+HANDLE_LIBCALL(NEG_I64, "__negdi2")
+
+// Floating-point
+HANDLE_LIBCALL(ADD_F32, "__addsf3")
+HANDLE_LIBCALL(ADD_F64, "__adddf3")
+HANDLE_LIBCALL(ADD_F80, "__addxf3")
+HANDLE_LIBCALL(ADD_F128, "__addtf3")
+HANDLE_LIBCALL(ADD_PPCF128, "__gcc_qadd")
+HANDLE_LIBCALL(SUB_F32, "__subsf3")
+HANDLE_LIBCALL(SUB_F64, "__subdf3")
+HANDLE_LIBCALL(SUB_F80, "__subxf3")
+HANDLE_LIBCALL(SUB_F128, "__subtf3")
+HANDLE_LIBCALL(SUB_PPCF128, "__gcc_qsub")
+HANDLE_LIBCALL(MUL_F32, "__mulsf3")
+HANDLE_LIBCALL(MUL_F64, "__muldf3")
+HANDLE_LIBCALL(MUL_F80, "__mulxf3")
+HANDLE_LIBCALL(MUL_F128, "__multf3")
+HANDLE_LIBCALL(MUL_PPCF128, "__gcc_qmul")
+HANDLE_LIBCALL(DIV_F32, "__divsf3")
+HANDLE_LIBCALL(DIV_F64, "__divdf3")
+HANDLE_LIBCALL(DIV_F80, "__divxf3")
+HANDLE_LIBCALL(DIV_F128, "__divtf3")
+HANDLE_LIBCALL(DIV_PPCF128, "__gcc_qdiv")
+HANDLE_LIBCALL(REM_F32, "fmodf")
+HANDLE_LIBCALL(REM_F64, "fmod")
+HANDLE_LIBCALL(REM_F80, "fmodl")
+HANDLE_LIBCALL(REM_F128, "fmodl")
+HANDLE_LIBCALL(REM_PPCF128, "fmodl")
+HANDLE_LIBCALL(FMA_F32, "fmaf")
+HANDLE_LIBCALL(FMA_F64, "fma")
+HANDLE_LIBCALL(FMA_F80, "fmal")
+HANDLE_LIBCALL(FMA_F128, "fmal")
+HANDLE_LIBCALL(FMA_PPCF128, "fmal")
+HANDLE_LIBCALL(POWI_F32, "__powisf2")
+HANDLE_LIBCALL(POWI_F64, "__powidf2")
+HANDLE_LIBCALL(POWI_F80, "__powixf2")
+HANDLE_LIBCALL(POWI_F128, "__powitf2")
+HANDLE_LIBCALL(POWI_PPCF128, "__powitf2")
+HANDLE_LIBCALL(SQRT_F32, "sqrtf")
+HANDLE_LIBCALL(SQRT_F64, "sqrt")
+HANDLE_LIBCALL(SQRT_F80, "sqrtl")
+HANDLE_LIBCALL(SQRT_F128, "sqrtl")
+HANDLE_LIBCALL(SQRT_PPCF128, "sqrtl")
+HANDLE_LIBCALL(LOG_F32, "logf")
+HANDLE_LIBCALL(LOG_F64, "log")
+HANDLE_LIBCALL(LOG_F80, "logl")
+HANDLE_LIBCALL(LOG_F128, "logl")
+HANDLE_LIBCALL(LOG_PPCF128, "logl")
+HANDLE_LIBCALL(LOG2_F32, "log2f")
+HANDLE_LIBCALL(LOG2_F64, "log2")
+HANDLE_LIBCALL(LOG2_F80, "log2l")
+HANDLE_LIBCALL(LOG2_F128, "log2l")
+HANDLE_LIBCALL(LOG2_PPCF128, "log2l")
+HANDLE_LIBCALL(LOG10_F32, "log10f")
+HANDLE_LIBCALL(LOG10_F64, "log10")
+HANDLE_LIBCALL(LOG10_F80, "log10l")
+HANDLE_LIBCALL(LOG10_F128, "log10l")
+HANDLE_LIBCALL(LOG10_PPCF128, "log10l")
+HANDLE_LIBCALL(EXP_F32, "expf")
+HANDLE_LIBCALL(EXP_F64, "exp")
+HANDLE_LIBCALL(EXP_F80, "expl")
+HANDLE_LIBCALL(EXP_F128, "expl")
+HANDLE_LIBCALL(EXP_PPCF128, "expl")
+HANDLE_LIBCALL(EXP2_F32, "exp2f")
+HANDLE_LIBCALL(EXP2_F64, "exp2")
+HANDLE_LIBCALL(EXP2_F80, "exp2l")
+HANDLE_LIBCALL(EXP2_F128, "exp2l")
+HANDLE_LIBCALL(EXP2_PPCF128, "exp2l")
+HANDLE_LIBCALL(SIN_F32, "sinf")
+HANDLE_LIBCALL(SIN_F64, "sin")
+HANDLE_LIBCALL(SIN_F80, "sinl")
+HANDLE_LIBCALL(SIN_F128, "sinl")
+HANDLE_LIBCALL(SIN_PPCF128, "sinl")
+HANDLE_LIBCALL(COS_F32, "cosf")
+HANDLE_LIBCALL(COS_F64, "cos")
+HANDLE_LIBCALL(COS_F80, "cosl")
+HANDLE_LIBCALL(COS_F128, "cosl")
+HANDLE_LIBCALL(COS_PPCF128, "cosl")
+HANDLE_LIBCALL(SINCOS_F32, nullptr)
+HANDLE_LIBCALL(SINCOS_F64, nullptr)
+HANDLE_LIBCALL(SINCOS_F80, nullptr)
+HANDLE_LIBCALL(SINCOS_F128, nullptr)
+HANDLE_LIBCALL(SINCOS_PPCF128, nullptr)
+HANDLE_LIBCALL(POW_F32, "powf")
+HANDLE_LIBCALL(POW_F64, "pow")
+HANDLE_LIBCALL(POW_F80, "powl")
+HANDLE_LIBCALL(POW_F128, "powl")
+HANDLE_LIBCALL(POW_PPCF128, "powl")
+HANDLE_LIBCALL(CEIL_F32, "ceilf")
+HANDLE_LIBCALL(CEIL_F64, "ceil")
+HANDLE_LIBCALL(CEIL_F80, "ceill")
+HANDLE_LIBCALL(CEIL_F128, "ceill")
+HANDLE_LIBCALL(CEIL_PPCF128, "ceill")
+HANDLE_LIBCALL(TRUNC_F32, "truncf")
+HANDLE_LIBCALL(TRUNC_F64, "trunc")
+HANDLE_LIBCALL(TRUNC_F80, "truncl")
+HANDLE_LIBCALL(TRUNC_F128, "truncl")
+HANDLE_LIBCALL(TRUNC_PPCF128, "truncl")
+HANDLE_LIBCALL(RINT_F32, "rintf")
+HANDLE_LIBCALL(RINT_F64, "rint")
+HANDLE_LIBCALL(RINT_F80, "rintl")
+HANDLE_LIBCALL(RINT_F128, "rintl")
+HANDLE_LIBCALL(RINT_PPCF128, "rintl")
+HANDLE_LIBCALL(NEARBYINT_F32, "nearbyintf")
+HANDLE_LIBCALL(NEARBYINT_F64, "nearbyint")
+HANDLE_LIBCALL(NEARBYINT_F80, "nearbyintl")
+HANDLE_LIBCALL(NEARBYINT_F128, "nearbyintl")
+HANDLE_LIBCALL(NEARBYINT_PPCF128, "nearbyintl")
+HANDLE_LIBCALL(ROUND_F32, "roundf")
+HANDLE_LIBCALL(ROUND_F64, "round")
+HANDLE_LIBCALL(ROUND_F80, "roundl")
+HANDLE_LIBCALL(ROUND_F128, "roundl")
+HANDLE_LIBCALL(ROUND_PPCF128, "roundl")
+HANDLE_LIBCALL(FLOOR_F32, "floorf")
+HANDLE_LIBCALL(FLOOR_F64, "floor")
+HANDLE_LIBCALL(FLOOR_F80, "floorl")
+HANDLE_LIBCALL(FLOOR_F128, "floorl")
+HANDLE_LIBCALL(FLOOR_PPCF128, "floorl")
+HANDLE_LIBCALL(COPYSIGN_F32, "copysignf")
+HANDLE_LIBCALL(COPYSIGN_F64, "copysign")
+HANDLE_LIBCALL(COPYSIGN_F80, "copysignl")
+HANDLE_LIBCALL(COPYSIGN_F128, "copysignl")
+HANDLE_LIBCALL(COPYSIGN_PPCF128, "copysignl")
+HANDLE_LIBCALL(FMIN_F32, "fminf")
+HANDLE_LIBCALL(FMIN_F64, "fmin")
+HANDLE_LIBCALL(FMIN_F80, "fminl")
+HANDLE_LIBCALL(FMIN_F128, "fminl")
+HANDLE_LIBCALL(FMIN_PPCF128, "fminl")
+HANDLE_LIBCALL(FMAX_F32, "fmaxf")
+HANDLE_LIBCALL(FMAX_F64, "fmax")
+HANDLE_LIBCALL(FMAX_F80, "fmaxl")
+HANDLE_LIBCALL(FMAX_F128, "fmaxl")
+HANDLE_LIBCALL(FMAX_PPCF128, "fmaxl")
+
+// Conversion
+HANDLE_LIBCALL(FPEXT_F32_PPCF128, "__gcc_stoq")
+HANDLE_LIBCALL(FPEXT_F64_PPCF128, "__gcc_dtoq")
+HANDLE_LIBCALL(FPEXT_F64_F128, "__extenddftf2")
+HANDLE_LIBCALL(FPEXT_F32_F128, "__extendsftf2")
+HANDLE_LIBCALL(FPEXT_F32_F64, "__extendsfdf2")
+HANDLE_LIBCALL(FPEXT_F16_F32, "__gnu_h2f_ieee")
+HANDLE_LIBCALL(FPROUND_F32_F16, "__gnu_f2h_ieee")
+HANDLE_LIBCALL(FPROUND_F64_F16, "__truncdfhf2")
+HANDLE_LIBCALL(FPROUND_F80_F16, "__truncxfhf2")
+HANDLE_LIBCALL(FPROUND_F128_F16, "__trunctfhf2")
+HANDLE_LIBCALL(FPROUND_PPCF128_F16, "__trunctfhf2")
+HANDLE_LIBCALL(FPROUND_F64_F32, "__truncdfsf2")
+HANDLE_LIBCALL(FPROUND_F80_F32, "__truncxfsf2")
+HANDLE_LIBCALL(FPROUND_F128_F32, "__trunctfsf2")
+HANDLE_LIBCALL(FPROUND_PPCF128_F32, "__gcc_qtos")
+HANDLE_LIBCALL(FPROUND_F80_F64, "__truncxfdf2")
+HANDLE_LIBCALL(FPROUND_F128_F64, "__trunctfdf2")
+HANDLE_LIBCALL(FPROUND_PPCF128_F64, "__gcc_qtod")
+HANDLE_LIBCALL(FPTOSINT_F32_I32, "__fixsfsi")
+HANDLE_LIBCALL(FPTOSINT_F32_I64, "__fixsfdi")
+HANDLE_LIBCALL(FPTOSINT_F32_I128, "__fixsfti")
+HANDLE_LIBCALL(FPTOSINT_F64_I32, "__fixdfsi")
+HANDLE_LIBCALL(FPTOSINT_F64_I64, "__fixdfdi")
+HANDLE_LIBCALL(FPTOSINT_F64_I128, "__fixdfti")
+HANDLE_LIBCALL(FPTOSINT_F80_I32, "__fixxfsi")
+HANDLE_LIBCALL(FPTOSINT_F80_I64, "__fixxfdi")
+HANDLE_LIBCALL(FPTOSINT_F80_I128, "__fixxfti")
+HANDLE_LIBCALL(FPTOSINT_F128_I32, "__fixtfsi")
+HANDLE_LIBCALL(FPTOSINT_F128_I64, "__fixtfdi")
+HANDLE_LIBCALL(FPTOSINT_F128_I128, "__fixtfti")
+HANDLE_LIBCALL(FPTOSINT_PPCF128_I32, "__gcc_qtou")
+HANDLE_LIBCALL(FPTOSINT_PPCF128_I64, "__fixtfdi")
+HANDLE_LIBCALL(FPTOSINT_PPCF128_I128, "__fixtfti")
+HANDLE_LIBCALL(FPTOUINT_F32_I32, "__fixunssfsi")
+HANDLE_LIBCALL(FPTOUINT_F32_I64, "__fixunssfdi")
+HANDLE_LIBCALL(FPTOUINT_F32_I128, "__fixunssfti")
+HANDLE_LIBCALL(FPTOUINT_F64_I32, "__fixunsdfsi")
+HANDLE_LIBCALL(FPTOUINT_F64_I64, "__fixunsdfdi")
+HANDLE_LIBCALL(FPTOUINT_F64_I128, "__fixunsdfti")
+HANDLE_LIBCALL(FPTOUINT_F80_I32, "__fixunsxfsi")
+HANDLE_LIBCALL(FPTOUINT_F80_I64, "__fixunsxfdi")
+HANDLE_LIBCALL(FPTOUINT_F80_I128, "__fixunsxfti")
+HANDLE_LIBCALL(FPTOUINT_F128_I32, "__fixunstfsi")
+HANDLE_LIBCALL(FPTOUINT_F128_I64, "__fixunstfdi")
+HANDLE_LIBCALL(FPTOUINT_F128_I128, "__fixunstfti")
+HANDLE_LIBCALL(FPTOUINT_PPCF128_I32, "__fixunstfsi")
+HANDLE_LIBCALL(FPTOUINT_PPCF128_I64, "__fixunstfdi")
+HANDLE_LIBCALL(FPTOUINT_PPCF128_I128, "__fixunstfti")
+HANDLE_LIBCALL(SINTTOFP_I32_F32, "__floatsisf")
+HANDLE_LIBCALL(SINTTOFP_I32_F64, "__floatsidf")
+HANDLE_LIBCALL(SINTTOFP_I32_F80, "__floatsixf")
+HANDLE_LIBCALL(SINTTOFP_I32_F128, "__floatsitf")
+HANDLE_LIBCALL(SINTTOFP_I32_PPCF128, "__gcc_itoq")
+HANDLE_LIBCALL(SINTTOFP_I64_F32, "__floatdisf")
+HANDLE_LIBCALL(SINTTOFP_I64_F64, "__floatdidf")
+HANDLE_LIBCALL(SINTTOFP_I64_F80, "__floatdixf")
+HANDLE_LIBCALL(SINTTOFP_I64_F128, "__floatditf")
+HANDLE_LIBCALL(SINTTOFP_I64_PPCF128, "__floatditf")
+HANDLE_LIBCALL(SINTTOFP_I128_F32, "__floattisf")
+HANDLE_LIBCALL(SINTTOFP_I128_F64, "__floattidf")
+HANDLE_LIBCALL(SINTTOFP_I128_F80, "__floattixf")
+HANDLE_LIBCALL(SINTTOFP_I128_F128, "__floattitf")
+HANDLE_LIBCALL(SINTTOFP_I128_PPCF128, "__floattitf")
+HANDLE_LIBCALL(UINTTOFP_I32_F32, "__floatunsisf")
+HANDLE_LIBCALL(UINTTOFP_I32_F64, "__floatunsidf")
+HANDLE_LIBCALL(UINTTOFP_I32_F80, "__floatunsixf")
+HANDLE_LIBCALL(UINTTOFP_I32_F128, "__floatunsitf")
+HANDLE_LIBCALL(UINTTOFP_I32_PPCF128, "__gcc_utoq")
+HANDLE_LIBCALL(UINTTOFP_I64_F32, "__floatundisf")
+HANDLE_LIBCALL(UINTTOFP_I64_F64, "__floatundidf")
+HANDLE_LIBCALL(UINTTOFP_I64_F80, "__floatundixf")
+HANDLE_LIBCALL(UINTTOFP_I64_F128, "__floatunditf")
+HANDLE_LIBCALL(UINTTOFP_I64_PPCF128, "__floatunditf")
+HANDLE_LIBCALL(UINTTOFP_I128_F32, "__floatuntisf")
+HANDLE_LIBCALL(UINTTOFP_I128_F64, "__floatuntidf")
+HANDLE_LIBCALL(UINTTOFP_I128_F80, "__floatuntixf")
+HANDLE_LIBCALL(UINTTOFP_I128_F128, "__floatuntitf")
+HANDLE_LIBCALL(UINTTOFP_I128_PPCF128, "__floatuntitf")
+
+// Comparison
+HANDLE_LIBCALL(OEQ_F32, "__eqsf2")
+HANDLE_LIBCALL(OEQ_F64, "__eqdf2")
+HANDLE_LIBCALL(OEQ_F128, "__eqtf2")
+HANDLE_LIBCALL(OEQ_PPCF128, "__gcc_qeq")
+HANDLE_LIBCALL(UNE_F32, "__nesf2")
+HANDLE_LIBCALL(UNE_F64, "__nedf2")
+HANDLE_LIBCALL(UNE_F128, "__netf2")
+HANDLE_LIBCALL(UNE_PPCF128, "__gcc_qne")
+HANDLE_LIBCALL(OGE_F32, "__gesf2")
+HANDLE_LIBCALL(OGE_F64, "__gedf2")
+HANDLE_LIBCALL(OGE_F128, "__getf2")
+HANDLE_LIBCALL(OGE_PPCF128, "__gcc_qge")
+HANDLE_LIBCALL(OLT_F32, "__ltsf2")
+HANDLE_LIBCALL(OLT_F64, "__ltdf2")
+HANDLE_LIBCALL(OLT_F128, "__lttf2")
+HANDLE_LIBCALL(OLT_PPCF128, "__gcc_qlt")
+HANDLE_LIBCALL(OLE_F32, "__lesf2")
+HANDLE_LIBCALL(OLE_F64, "__ledf2")
+HANDLE_LIBCALL(OLE_F128, "__letf2")
+HANDLE_LIBCALL(OLE_PPCF128, "__gcc_qle")
+HANDLE_LIBCALL(OGT_F32, "__gtsf2")
+HANDLE_LIBCALL(OGT_F64, "__gtdf2")
+HANDLE_LIBCALL(OGT_F128, "__gttf2")
+HANDLE_LIBCALL(OGT_PPCF128, "__gcc_qgt")
+HANDLE_LIBCALL(UO_F32, "__unordsf2")
+HANDLE_LIBCALL(UO_F64, "__unorddf2")
+HANDLE_LIBCALL(UO_F128, "__unordtf2")
+HANDLE_LIBCALL(UO_PPCF128, "__gcc_qunord")
+HANDLE_LIBCALL(O_F32, "__unordsf2")
+HANDLE_LIBCALL(O_F64, "__unorddf2")
+HANDLE_LIBCALL(O_F128, "__unordtf2")
+HANDLE_LIBCALL(O_PPCF128, "__gcc_qunord")
+
+// Memory
+HANDLE_LIBCALL(MEMCPY, "memcpy")
+HANDLE_LIBCALL(MEMMOVE, "memmove")
+HANDLE_LIBCALL(MEMSET, "memset")
+
+// Element-wise unordered-atomic memory of different sizes
+HANDLE_LIBCALL(MEMCPY_ELEMENT_UNORDERED_ATOMIC_1, "__llvm_memcpy_element_unordered_atomic_1")
+HANDLE_LIBCALL(MEMCPY_ELEMENT_UNORDERED_ATOMIC_2, "__llvm_memcpy_element_unordered_atomic_2")
+HANDLE_LIBCALL(MEMCPY_ELEMENT_UNORDERED_ATOMIC_4, "__llvm_memcpy_element_unordered_atomic_4")
+HANDLE_LIBCALL(MEMCPY_ELEMENT_UNORDERED_ATOMIC_8, "__llvm_memcpy_element_unordered_atomic_8")
+HANDLE_LIBCALL(MEMCPY_ELEMENT_UNORDERED_ATOMIC_16, "__llvm_memcpy_element_unordered_atomic_16")
+HANDLE_LIBCALL(MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1, "__llvm_memmove_element_unordered_atomic_1")
+HANDLE_LIBCALL(MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2, "__llvm_memmove_element_unordered_atomic_2")
+HANDLE_LIBCALL(MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4, "__llvm_memmove_element_unordered_atomic_4")
+HANDLE_LIBCALL(MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8, "__llvm_memmove_element_unordered_atomic_8")
+HANDLE_LIBCALL(MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16, "__llvm_memmove_element_unordered_atomic_16")
+HANDLE_LIBCALL(MEMSET_ELEMENT_UNORDERED_ATOMIC_1, "__llvm_memset_element_unordered_atomic_1")
+HANDLE_LIBCALL(MEMSET_ELEMENT_UNORDERED_ATOMIC_2, "__llvm_memset_element_unordered_atomic_2")
+HANDLE_LIBCALL(MEMSET_ELEMENT_UNORDERED_ATOMIC_4, "__llvm_memset_element_unordered_atomic_4")
+HANDLE_LIBCALL(MEMSET_ELEMENT_UNORDERED_ATOMIC_8, "__llvm_memset_element_unordered_atomic_8")
+HANDLE_LIBCALL(MEMSET_ELEMENT_UNORDERED_ATOMIC_16, "__llvm_memset_element_unordered_atomic_16")
+
+// Exception handling
+HANDLE_LIBCALL(UNWIND_RESUME, "_Unwind_Resume")
+
+// Note: there are two sets of atomics libcalls; see
+// <https://llvm.org/docs/Atomics.html> for more info on the
+// difference between them.
+
+// Atomic '__sync_*' libcalls.
+HANDLE_LIBCALL(SYNC_VAL_COMPARE_AND_SWAP_1, "__sync_val_compare_and_swap_1")
+HANDLE_LIBCALL(SYNC_VAL_COMPARE_AND_SWAP_2, "__sync_val_compare_and_swap_2")
+HANDLE_LIBCALL(SYNC_VAL_COMPARE_AND_SWAP_4, "__sync_val_compare_and_swap_4")
+HANDLE_LIBCALL(SYNC_VAL_COMPARE_AND_SWAP_8, "__sync_val_compare_and_swap_8")
+HANDLE_LIBCALL(SYNC_VAL_COMPARE_AND_SWAP_16, "__sync_val_compare_and_swap_16")
+HANDLE_LIBCALL(SYNC_LOCK_TEST_AND_SET_1, "__sync_lock_test_and_set_1")
+HANDLE_LIBCALL(SYNC_LOCK_TEST_AND_SET_2, "__sync_lock_test_and_set_2")
+HANDLE_LIBCALL(SYNC_LOCK_TEST_AND_SET_4, "__sync_lock_test_and_set_4")
+HANDLE_LIBCALL(SYNC_LOCK_TEST_AND_SET_8, "__sync_lock_test_and_set_8")
+HANDLE_LIBCALL(SYNC_LOCK_TEST_AND_SET_16, "__sync_lock_test_and_set_16")
+HANDLE_LIBCALL(SYNC_FETCH_AND_ADD_1, "__sync_fetch_and_add_1")
+HANDLE_LIBCALL(SYNC_FETCH_AND_ADD_2, "__sync_fetch_and_add_2")
+HANDLE_LIBCALL(SYNC_FETCH_AND_ADD_4, "__sync_fetch_and_add_4")
+HANDLE_LIBCALL(SYNC_FETCH_AND_ADD_8, "__sync_fetch_and_add_8")
+HANDLE_LIBCALL(SYNC_FETCH_AND_ADD_16, "__sync_fetch_and_add_16")
+HANDLE_LIBCALL(SYNC_FETCH_AND_SUB_1, "__sync_fetch_and_sub_1")
+HANDLE_LIBCALL(SYNC_FETCH_AND_SUB_2, "__sync_fetch_and_sub_2")
+HANDLE_LIBCALL(SYNC_FETCH_AND_SUB_4, "__sync_fetch_and_sub_4")
+HANDLE_LIBCALL(SYNC_FETCH_AND_SUB_8, "__sync_fetch_and_sub_8")
+HANDLE_LIBCALL(SYNC_FETCH_AND_SUB_16, "__sync_fetch_and_sub_16")
+HANDLE_LIBCALL(SYNC_FETCH_AND_AND_1, "__sync_fetch_and_and_1")
+HANDLE_LIBCALL(SYNC_FETCH_AND_AND_2, "__sync_fetch_and_and_2")
+HANDLE_LIBCALL(SYNC_FETCH_AND_AND_4, "__sync_fetch_and_and_4")
+HANDLE_LIBCALL(SYNC_FETCH_AND_AND_8, "__sync_fetch_and_and_8")
+HANDLE_LIBCALL(SYNC_FETCH_AND_AND_16, "__sync_fetch_and_and_16")
+HANDLE_LIBCALL(SYNC_FETCH_AND_OR_1, "__sync_fetch_and_or_1")
+HANDLE_LIBCALL(SYNC_FETCH_AND_OR_2, "__sync_fetch_and_or_2")
+HANDLE_LIBCALL(SYNC_FETCH_AND_OR_4, "__sync_fetch_and_or_4")
+HANDLE_LIBCALL(SYNC_FETCH_AND_OR_8, "__sync_fetch_and_or_8")
+HANDLE_LIBCALL(SYNC_FETCH_AND_OR_16, "__sync_fetch_and_or_16")
+HANDLE_LIBCALL(SYNC_FETCH_AND_XOR_1, "__sync_fetch_and_xor_1")
+HANDLE_LIBCALL(SYNC_FETCH_AND_XOR_2, "__sync_fetch_and_xor_2")
+HANDLE_LIBCALL(SYNC_FETCH_AND_XOR_4, "__sync_fetch_and_xor_4")
+HANDLE_LIBCALL(SYNC_FETCH_AND_XOR_8, "__sync_fetch_and_xor_8")
+HANDLE_LIBCALL(SYNC_FETCH_AND_XOR_16, "__sync_fetch_and_xor_16")
+HANDLE_LIBCALL(SYNC_FETCH_AND_NAND_1, "__sync_fetch_and_nand_1")
+HANDLE_LIBCALL(SYNC_FETCH_AND_NAND_2, "__sync_fetch_and_nand_2")
+HANDLE_LIBCALL(SYNC_FETCH_AND_NAND_4, "__sync_fetch_and_nand_4")
+HANDLE_LIBCALL(SYNC_FETCH_AND_NAND_8, "__sync_fetch_and_nand_8")
+HANDLE_LIBCALL(SYNC_FETCH_AND_NAND_16, "__sync_fetch_and_nand_16")
+HANDLE_LIBCALL(SYNC_FETCH_AND_MAX_1, "__sync_fetch_and_max_1")
+HANDLE_LIBCALL(SYNC_FETCH_AND_MAX_2, "__sync_fetch_and_max_2")
+HANDLE_LIBCALL(SYNC_FETCH_AND_MAX_4, "__sync_fetch_and_max_4")
+HANDLE_LIBCALL(SYNC_FETCH_AND_MAX_8, "__sync_fetch_and_max_8")
+HANDLE_LIBCALL(SYNC_FETCH_AND_MAX_16, "__sync_fetch_and_max_16")
+HANDLE_LIBCALL(SYNC_FETCH_AND_UMAX_1, "__sync_fetch_and_umax_1")
+HANDLE_LIBCALL(SYNC_FETCH_AND_UMAX_2, "__sync_fetch_and_umax_2")
+HANDLE_LIBCALL(SYNC_FETCH_AND_UMAX_4, "__sync_fetch_and_umax_4")
+HANDLE_LIBCALL(SYNC_FETCH_AND_UMAX_8, "__sync_fetch_and_umax_8")
+HANDLE_LIBCALL(SYNC_FETCH_AND_UMAX_16, "__sync_fetch_and_umax_16")
+HANDLE_LIBCALL(SYNC_FETCH_AND_MIN_1, "__sync_fetch_and_min_1")
+HANDLE_LIBCALL(SYNC_FETCH_AND_MIN_2, "__sync_fetch_and_min_2")
+HANDLE_LIBCALL(SYNC_FETCH_AND_MIN_4, "__sync_fetch_and_min_4")
+HANDLE_LIBCALL(SYNC_FETCH_AND_MIN_8, "__sync_fetch_and_min_8")
+HANDLE_LIBCALL(SYNC_FETCH_AND_MIN_16, "__sync_fetch_and_min_16")
+HANDLE_LIBCALL(SYNC_FETCH_AND_UMIN_1, "__sync_fetch_and_umin_1")
+HANDLE_LIBCALL(SYNC_FETCH_AND_UMIN_2, "__sync_fetch_and_umin_2")
+HANDLE_LIBCALL(SYNC_FETCH_AND_UMIN_4, "__sync_fetch_and_umin_4")
+HANDLE_LIBCALL(SYNC_FETCH_AND_UMIN_8, "__sync_fetch_and_umin_8")
+HANDLE_LIBCALL(SYNC_FETCH_AND_UMIN_16, "__sync_fetch_and_umin_16")
+
+// Atomic `__atomic_*' libcalls.
+HANDLE_LIBCALL(ATOMIC_LOAD, "__atomic_load")
+HANDLE_LIBCALL(ATOMIC_LOAD_1, "__atomic_load_1")
+HANDLE_LIBCALL(ATOMIC_LOAD_2, "__atomic_load_2")
+HANDLE_LIBCALL(ATOMIC_LOAD_4, "__atomic_load_4")
+HANDLE_LIBCALL(ATOMIC_LOAD_8, "__atomic_load_8")
+HANDLE_LIBCALL(ATOMIC_LOAD_16, "__atomic_load_16")
+
+HANDLE_LIBCALL(ATOMIC_STORE, "__atomic_store")
+HANDLE_LIBCALL(ATOMIC_STORE_1, "__atomic_store_1")
+HANDLE_LIBCALL(ATOMIC_STORE_2, "__atomic_store_2")
+HANDLE_LIBCALL(ATOMIC_STORE_4, "__atomic_store_4")
+HANDLE_LIBCALL(ATOMIC_STORE_8, "__atomic_store_8")
+HANDLE_LIBCALL(ATOMIC_STORE_16, "__atomic_store_16")
+
+HANDLE_LIBCALL(ATOMIC_EXCHANGE, "__atomic_exchange")
+HANDLE_LIBCALL(ATOMIC_EXCHANGE_1, "__atomic_exchange_1")
+HANDLE_LIBCALL(ATOMIC_EXCHANGE_2, "__atomic_exchange_2")
+HANDLE_LIBCALL(ATOMIC_EXCHANGE_4, "__atomic_exchange_4")
+HANDLE_LIBCALL(ATOMIC_EXCHANGE_8, "__atomic_exchange_8")
+HANDLE_LIBCALL(ATOMIC_EXCHANGE_16, "__atomic_exchange_16")
+
+HANDLE_LIBCALL(ATOMIC_COMPARE_EXCHANGE, "__atomic_compare_exchange")
+HANDLE_LIBCALL(ATOMIC_COMPARE_EXCHANGE_1, "__atomic_compare_exchange_1")
+HANDLE_LIBCALL(ATOMIC_COMPARE_EXCHANGE_2, "__atomic_compare_exchange_2")
+HANDLE_LIBCALL(ATOMIC_COMPARE_EXCHANGE_4, "__atomic_compare_exchange_4")
+HANDLE_LIBCALL(ATOMIC_COMPARE_EXCHANGE_8, "__atomic_compare_exchange_8")
+HANDLE_LIBCALL(ATOMIC_COMPARE_EXCHANGE_16, "__atomic_compare_exchange_16")
+
+HANDLE_LIBCALL(ATOMIC_FETCH_ADD_1, "__atomic_fetch_add_1")
+HANDLE_LIBCALL(ATOMIC_FETCH_ADD_2, "__atomic_fetch_add_2")
+HANDLE_LIBCALL(ATOMIC_FETCH_ADD_4, "__atomic_fetch_add_4")
+HANDLE_LIBCALL(ATOMIC_FETCH_ADD_8, "__atomic_fetch_add_8")
+HANDLE_LIBCALL(ATOMIC_FETCH_ADD_16, "__atomic_fetch_add_16")
+HANDLE_LIBCALL(ATOMIC_FETCH_SUB_1, "__atomic_fetch_sub_1")
+HANDLE_LIBCALL(ATOMIC_FETCH_SUB_2, "__atomic_fetch_sub_2")
+HANDLE_LIBCALL(ATOMIC_FETCH_SUB_4, "__atomic_fetch_sub_4")
+HANDLE_LIBCALL(ATOMIC_FETCH_SUB_8, "__atomic_fetch_sub_8")
+HANDLE_LIBCALL(ATOMIC_FETCH_SUB_16, "__atomic_fetch_sub_16")
+HANDLE_LIBCALL(ATOMIC_FETCH_AND_1, "__atomic_fetch_and_1")
+HANDLE_LIBCALL(ATOMIC_FETCH_AND_2, "__atomic_fetch_and_2")
+HANDLE_LIBCALL(ATOMIC_FETCH_AND_4, "__atomic_fetch_and_4")
+HANDLE_LIBCALL(ATOMIC_FETCH_AND_8, "__atomic_fetch_and_8")
+HANDLE_LIBCALL(ATOMIC_FETCH_AND_16, "__atomic_fetch_and_16")
+HANDLE_LIBCALL(ATOMIC_FETCH_OR_1, "__atomic_fetch_or_1")
+HANDLE_LIBCALL(ATOMIC_FETCH_OR_2, "__atomic_fetch_or_2")
+HANDLE_LIBCALL(ATOMIC_FETCH_OR_4, "__atomic_fetch_or_4")
+HANDLE_LIBCALL(ATOMIC_FETCH_OR_8, "__atomic_fetch_or_8")
+HANDLE_LIBCALL(ATOMIC_FETCH_OR_16, "__atomic_fetch_or_16")
+HANDLE_LIBCALL(ATOMIC_FETCH_XOR_1, "__atomic_fetch_xor_1")
+HANDLE_LIBCALL(ATOMIC_FETCH_XOR_2, "__atomic_fetch_xor_2")
+HANDLE_LIBCALL(ATOMIC_FETCH_XOR_4, "__atomic_fetch_xor_4")
+HANDLE_LIBCALL(ATOMIC_FETCH_XOR_8, "__atomic_fetch_xor_8")
+HANDLE_LIBCALL(ATOMIC_FETCH_XOR_16, "__atomic_fetch_xor_16")
+HANDLE_LIBCALL(ATOMIC_FETCH_NAND_1, "__atomic_fetch_nand_1")
+HANDLE_LIBCALL(ATOMIC_FETCH_NAND_2, "__atomic_fetch_nand_2")
+HANDLE_LIBCALL(ATOMIC_FETCH_NAND_4, "__atomic_fetch_nand_4")
+HANDLE_LIBCALL(ATOMIC_FETCH_NAND_8, "__atomic_fetch_nand_8")
+HANDLE_LIBCALL(ATOMIC_FETCH_NAND_16, "__atomic_fetch_nand_16")
+
+// Stack Protector Fail
+HANDLE_LIBCALL(STACKPROTECTOR_CHECK_FAIL, "__stack_chk_fail")
+
+// Deoptimization
+HANDLE_LIBCALL(DEOPTIMIZE, "__llvm_deoptimize")
+
+HANDLE_LIBCALL(UNKNOWN_LIBCALL, nullptr)
+
+#undef HANDLE_LIBCALL
diff --git a/include/llvm/CodeGen/RuntimeLibcalls.h b/include/llvm/CodeGen/RuntimeLibcalls.h
index 08151be110832..016bef1702c4c 100644
--- a/include/llvm/CodeGen/RuntimeLibcalls.h
+++ b/include/llvm/CodeGen/RuntimeLibcalls.h
@@ -28,471 +28,9 @@ namespace RTLIB {
   /// PPCISelLowering.cpp.
   ///
   enum Libcall {
-    // Integer
-    SHL_I16,
-    SHL_I32,
-    SHL_I64,
-    SHL_I128,
-    SRL_I16,
-    SRL_I32,
-    SRL_I64,
-    SRL_I128,
-    SRA_I16,
-    SRA_I32,
-    SRA_I64,
-    SRA_I128,
-    MUL_I8,
-    MUL_I16,
-    MUL_I32,
-    MUL_I64,
-    MUL_I128,
-    MULO_I32,
-    MULO_I64,
-    MULO_I128,
-    SDIV_I8,
-    SDIV_I16,
-    SDIV_I32,
-    SDIV_I64,
-    SDIV_I128,
-    UDIV_I8,
-    UDIV_I16,
-    UDIV_I32,
-    UDIV_I64,
-    UDIV_I128,
-    SREM_I8,
-    SREM_I16,
-    SREM_I32,
-    SREM_I64,
-    SREM_I128,
-    UREM_I8,
-    UREM_I16,
-    UREM_I32,
-    UREM_I64,
-    UREM_I128,
-    SDIVREM_I8,
-    SDIVREM_I16,
-    SDIVREM_I32,
-    SDIVREM_I64,
-    SDIVREM_I128,
-    UDIVREM_I8,
-    UDIVREM_I16,
-    UDIVREM_I32,
-    UDIVREM_I64,
-    UDIVREM_I128,
-    NEG_I32,
-    NEG_I64,
-
-    // FLOATING POINT
-    ADD_F32,
-    ADD_F64,
-    ADD_F80,
-    ADD_F128,
-    ADD_PPCF128,
-    SUB_F32,
-    SUB_F64,
-    SUB_F80,
-    SUB_F128,
-    SUB_PPCF128,
-    MUL_F32,
-    MUL_F64,
-    MUL_F80,
-    MUL_F128,
-    MUL_PPCF128,
-    DIV_F32,
-    DIV_F64,
-    DIV_F80,
-    DIV_F128,
-    DIV_PPCF128,
-    REM_F32,
-    REM_F64,
-    REM_F80,
-    REM_F128,
-    REM_PPCF128,
-    FMA_F32,
-    FMA_F64,
-    FMA_F80,
-    FMA_F128,
-    FMA_PPCF128,
-    POWI_F32,
-    POWI_F64,
-    POWI_F80,
-    POWI_F128,
-    POWI_PPCF128,
-    SQRT_F32,
-    SQRT_F64,
-    SQRT_F80,
-    SQRT_F128,
-    SQRT_PPCF128,
-    LOG_F32,
-    LOG_F64,
-    LOG_F80,
-    LOG_F128,
-    LOG_PPCF128,
-    LOG2_F32,
-    LOG2_F64,
-    LOG2_F80,
-    LOG2_F128,
-    LOG2_PPCF128,
-    LOG10_F32,
-    LOG10_F64,
-    LOG10_F80,
-    LOG10_F128,
-    LOG10_PPCF128,
-    EXP_F32,
-    EXP_F64,
-    EXP_F80,
-    EXP_F128,
-    EXP_PPCF128,
-    EXP2_F32,
-    EXP2_F64,
-    EXP2_F80,
-    EXP2_F128,
-    EXP2_PPCF128,
-    SIN_F32,
-    SIN_F64,
-    SIN_F80,
-    SIN_F128,
-    SIN_PPCF128,
-    COS_F32,
-    COS_F64,
-    COS_F80,
-    COS_F128,
-    COS_PPCF128,
-    SINCOS_F32,
-    SINCOS_F64,
-    SINCOS_F80,
-    SINCOS_F128,
-    SINCOS_PPCF128,
-    POW_F32,
-    POW_F64,
-    POW_F80,
-    POW_F128,
-    POW_PPCF128,
-    CEIL_F32,
-    CEIL_F64,
-    CEIL_F80,
-    CEIL_F128,
-    CEIL_PPCF128,
-    TRUNC_F32,
-    TRUNC_F64,
-    TRUNC_F80,
-    TRUNC_F128,
-    TRUNC_PPCF128,
-    RINT_F32,
-    RINT_F64,
-    RINT_F80,
-    RINT_F128,
-    RINT_PPCF128,
-    NEARBYINT_F32,
-    NEARBYINT_F64,
-    NEARBYINT_F80,
-    NEARBYINT_F128,
-    NEARBYINT_PPCF128,
-    ROUND_F32,
-    ROUND_F64,
-    ROUND_F80,
-    ROUND_F128,
-    ROUND_PPCF128,
-    FLOOR_F32,
-    FLOOR_F64,
-    FLOOR_F80,
-    FLOOR_F128,
-    FLOOR_PPCF128,
-    COPYSIGN_F32,
-    COPYSIGN_F64,
-    COPYSIGN_F80,
-    COPYSIGN_F128,
-    COPYSIGN_PPCF128,
-    FMIN_F32,
-    FMIN_F64,
-    FMIN_F80,
-    FMIN_F128,
-    FMIN_PPCF128,
-    FMAX_F32,
-    FMAX_F64,
-    FMAX_F80,
-    FMAX_F128,
-    FMAX_PPCF128,
-
-    // CONVERSION
-    FPEXT_F32_PPCF128,
-    FPEXT_F64_PPCF128,
-    FPEXT_F64_F128,
-    FPEXT_F32_F128,
-    FPEXT_F32_F64,
-    FPEXT_F16_F32,
-    FPROUND_F32_F16,
-    FPROUND_F64_F16,
-    FPROUND_F80_F16,
-    FPROUND_F128_F16,
-    FPROUND_PPCF128_F16,
-    FPROUND_F64_F32,
-    FPROUND_F80_F32,
-    FPROUND_F128_F32,
-    FPROUND_PPCF128_F32,
-    FPROUND_F80_F64,
-    FPROUND_F128_F64,
-    FPROUND_PPCF128_F64,
-    FPTOSINT_F32_I32,
-    FPTOSINT_F32_I64,
-    FPTOSINT_F32_I128,
-    FPTOSINT_F64_I32,
-    FPTOSINT_F64_I64,
-    FPTOSINT_F64_I128,
-    FPTOSINT_F80_I32,
-    FPTOSINT_F80_I64,
-    FPTOSINT_F80_I128,
-    FPTOSINT_F128_I32,
-    FPTOSINT_F128_I64,
-    FPTOSINT_F128_I128,
-    FPTOSINT_PPCF128_I32,
-    FPTOSINT_PPCF128_I64,
-    FPTOSINT_PPCF128_I128,
-    FPTOUINT_F32_I32,
-    FPTOUINT_F32_I64,
-    FPTOUINT_F32_I128,
-    FPTOUINT_F64_I32,
-    FPTOUINT_F64_I64,
-    FPTOUINT_F64_I128,
-    FPTOUINT_F80_I32,
-    FPTOUINT_F80_I64,
-    FPTOUINT_F80_I128,
-    FPTOUINT_F128_I32,
-    FPTOUINT_F128_I64,
-    FPTOUINT_F128_I128,
-    FPTOUINT_PPCF128_I32,
-    FPTOUINT_PPCF128_I64,
-    FPTOUINT_PPCF128_I128,
-    SINTTOFP_I32_F32,
-    SINTTOFP_I32_F64,
-    SINTTOFP_I32_F80,
-    SINTTOFP_I32_F128,
-    SINTTOFP_I32_PPCF128,
-    SINTTOFP_I64_F32,
-    SINTTOFP_I64_F64,
-    SINTTOFP_I64_F80,
-    SINTTOFP_I64_F128,
-    SINTTOFP_I64_PPCF128,
-    SINTTOFP_I128_F32,
-    SINTTOFP_I128_F64,
-    SINTTOFP_I128_F80,
-    SINTTOFP_I128_F128,
-    SINTTOFP_I128_PPCF128,
-    UINTTOFP_I32_F32,
-    UINTTOFP_I32_F64,
-    UINTTOFP_I32_F80,
-    UINTTOFP_I32_F128,
-    UINTTOFP_I32_PPCF128,
-    UINTTOFP_I64_F32,
-    UINTTOFP_I64_F64,
-    UINTTOFP_I64_F80,
-    UINTTOFP_I64_F128,
-    UINTTOFP_I64_PPCF128,
-    UINTTOFP_I128_F32,
-    UINTTOFP_I128_F64,
-    UINTTOFP_I128_F80,
-    UINTTOFP_I128_F128,
-    UINTTOFP_I128_PPCF128,
-
-    // COMPARISON
-    OEQ_F32,
-    OEQ_F64,
-    OEQ_F128,
-    OEQ_PPCF128,
-    UNE_F32,
-    UNE_F64,
-    UNE_F128,
-    UNE_PPCF128,
-    OGE_F32,
-    OGE_F64,
-    OGE_F128,
-    OGE_PPCF128,
-    OLT_F32,
-    OLT_F64,
-    OLT_F128,
-    OLT_PPCF128,
-    OLE_F32,
-    OLE_F64,
-    OLE_F128,
-    OLE_PPCF128,
-    OGT_F32,
-    OGT_F64,
-    OGT_F128,
-    OGT_PPCF128,
-    UO_F32,
-    UO_F64,
-    UO_F128,
-    UO_PPCF128,
-    O_F32,
-    O_F64,
-    O_F128,
-    O_PPCF128,
-
-    // MEMORY
-    MEMCPY,
-    MEMSET,
-    MEMMOVE,
-
-    // ELEMENT-WISE UNORDERED-ATOMIC MEMORY of different element sizes
-    MEMCPY_ELEMENT_UNORDERED_ATOMIC_1,
-    MEMCPY_ELEMENT_UNORDERED_ATOMIC_2,
-    MEMCPY_ELEMENT_UNORDERED_ATOMIC_4,
-    MEMCPY_ELEMENT_UNORDERED_ATOMIC_8,
-    MEMCPY_ELEMENT_UNORDERED_ATOMIC_16,
-
-    MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1,
-    MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2,
-    MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4,
-    MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8,
-    MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16,
-    
-    MEMSET_ELEMENT_UNORDERED_ATOMIC_1,
-    MEMSET_ELEMENT_UNORDERED_ATOMIC_2,
-    MEMSET_ELEMENT_UNORDERED_ATOMIC_4,
-    MEMSET_ELEMENT_UNORDERED_ATOMIC_8,
-    MEMSET_ELEMENT_UNORDERED_ATOMIC_16,
-
-    // EXCEPTION HANDLING
-    UNWIND_RESUME,
-
-    // Note: there's two sets of atomics libcalls; see
-    // <http://llvm.org/docs/Atomics.html> for more info on the
-    // difference between them.
-
-    // Atomic '__sync_*' libcalls.
-    SYNC_VAL_COMPARE_AND_SWAP_1,
-    SYNC_VAL_COMPARE_AND_SWAP_2,
-    SYNC_VAL_COMPARE_AND_SWAP_4,
-    SYNC_VAL_COMPARE_AND_SWAP_8,
-    SYNC_VAL_COMPARE_AND_SWAP_16,
-    SYNC_LOCK_TEST_AND_SET_1,
-    SYNC_LOCK_TEST_AND_SET_2,
-    SYNC_LOCK_TEST_AND_SET_4,
-    SYNC_LOCK_TEST_AND_SET_8,
-    SYNC_LOCK_TEST_AND_SET_16,
-    SYNC_FETCH_AND_ADD_1,
-    SYNC_FETCH_AND_ADD_2,
-    SYNC_FETCH_AND_ADD_4,
-    SYNC_FETCH_AND_ADD_8,
-    SYNC_FETCH_AND_ADD_16,
-    SYNC_FETCH_AND_SUB_1,
-    SYNC_FETCH_AND_SUB_2,
-    SYNC_FETCH_AND_SUB_4,
-    SYNC_FETCH_AND_SUB_8,
-    SYNC_FETCH_AND_SUB_16,
-    SYNC_FETCH_AND_AND_1,
-    SYNC_FETCH_AND_AND_2,
-    SYNC_FETCH_AND_AND_4,
-    SYNC_FETCH_AND_AND_8,
-    SYNC_FETCH_AND_AND_16,
-    SYNC_FETCH_AND_OR_1,
-    SYNC_FETCH_AND_OR_2,
-    SYNC_FETCH_AND_OR_4,
-    SYNC_FETCH_AND_OR_8,
-    SYNC_FETCH_AND_OR_16,
-    SYNC_FETCH_AND_XOR_1,
-    SYNC_FETCH_AND_XOR_2,
-    SYNC_FETCH_AND_XOR_4,
-    SYNC_FETCH_AND_XOR_8,
-    SYNC_FETCH_AND_XOR_16,
-    SYNC_FETCH_AND_NAND_1,
-    SYNC_FETCH_AND_NAND_2,
-    SYNC_FETCH_AND_NAND_4,
-    SYNC_FETCH_AND_NAND_8,
-    SYNC_FETCH_AND_NAND_16,
-    SYNC_FETCH_AND_MAX_1,
-    SYNC_FETCH_AND_MAX_2,
-    SYNC_FETCH_AND_MAX_4,
-    SYNC_FETCH_AND_MAX_8,
-    SYNC_FETCH_AND_MAX_16,
-    SYNC_FETCH_AND_UMAX_1,
-    SYNC_FETCH_AND_UMAX_2,
-    SYNC_FETCH_AND_UMAX_4,
-    SYNC_FETCH_AND_UMAX_8,
-    SYNC_FETCH_AND_UMAX_16,
-    SYNC_FETCH_AND_MIN_1,
-    SYNC_FETCH_AND_MIN_2,
-    SYNC_FETCH_AND_MIN_4,
-    SYNC_FETCH_AND_MIN_8,
-    SYNC_FETCH_AND_MIN_16,
-    SYNC_FETCH_AND_UMIN_1,
-    SYNC_FETCH_AND_UMIN_2,
-    SYNC_FETCH_AND_UMIN_4,
-    SYNC_FETCH_AND_UMIN_8,
-    SYNC_FETCH_AND_UMIN_16,
-
-    // Atomic '__atomic_*' libcalls.
-    ATOMIC_LOAD,
-    ATOMIC_LOAD_1,
-    ATOMIC_LOAD_2,
-    ATOMIC_LOAD_4,
-    ATOMIC_LOAD_8,
-    ATOMIC_LOAD_16,
-
-    ATOMIC_STORE,
-    ATOMIC_STORE_1,
-    ATOMIC_STORE_2,
-    ATOMIC_STORE_4,
-    ATOMIC_STORE_8,
-    ATOMIC_STORE_16,
-
-    ATOMIC_EXCHANGE,
-    ATOMIC_EXCHANGE_1,
-    ATOMIC_EXCHANGE_2,
-    ATOMIC_EXCHANGE_4,
-    ATOMIC_EXCHANGE_8,
-    ATOMIC_EXCHANGE_16,
-
-    ATOMIC_COMPARE_EXCHANGE,
-    ATOMIC_COMPARE_EXCHANGE_1,
-    ATOMIC_COMPARE_EXCHANGE_2,
-    ATOMIC_COMPARE_EXCHANGE_4,
-    ATOMIC_COMPARE_EXCHANGE_8,
-    ATOMIC_COMPARE_EXCHANGE_16,
-
-    ATOMIC_FETCH_ADD_1,
-    ATOMIC_FETCH_ADD_2,
-    ATOMIC_FETCH_ADD_4,
-    ATOMIC_FETCH_ADD_8,
-    ATOMIC_FETCH_ADD_16,
-
-    ATOMIC_FETCH_SUB_1,
-    ATOMIC_FETCH_SUB_2,
-    ATOMIC_FETCH_SUB_4,
-    ATOMIC_FETCH_SUB_8,
-    ATOMIC_FETCH_SUB_16,
-
-    ATOMIC_FETCH_AND_1,
-    ATOMIC_FETCH_AND_2,
-    ATOMIC_FETCH_AND_4,
-    ATOMIC_FETCH_AND_8,
-    ATOMIC_FETCH_AND_16,
-
-    ATOMIC_FETCH_OR_1,
-    ATOMIC_FETCH_OR_2,
-    ATOMIC_FETCH_OR_4,
-    ATOMIC_FETCH_OR_8,
-    ATOMIC_FETCH_OR_16,
-
-    ATOMIC_FETCH_XOR_1,
-    ATOMIC_FETCH_XOR_2,
-    ATOMIC_FETCH_XOR_4,
-    ATOMIC_FETCH_XOR_8,
-    ATOMIC_FETCH_XOR_16,
-
-    ATOMIC_FETCH_NAND_1,
-    ATOMIC_FETCH_NAND_2,
-    ATOMIC_FETCH_NAND_4,
-    ATOMIC_FETCH_NAND_8,
-    ATOMIC_FETCH_NAND_16,
-
-    // Stack Protector Fail.
-    STACKPROTECTOR_CHECK_FAIL,
-
-    // Deoptimization.
-    DEOPTIMIZE,
-
-    UNKNOWN_LIBCALL
+#define HANDLE_LIBCALL(code, name) code,
+    #include "RuntimeLibcalls.def"
+#undef HANDLE_LIBCALL
   };
 
   /// getFPEXT - Return the FPEXT_*_* value for the given types, or
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index 25afc5b506df6..f3f2f05b877da 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -22,8 +22,8 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetLowering.h"
 #include <cassert>
 #include <cstddef>
 #include <iterator>
diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h
index 218e22e402349..14882205584e2 100644
--- a/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -24,9 +24,9 @@
 #include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSchedule.h"
 #include "llvm/MC/LaneBitmask.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include <cassert>
 #include <cstdint>
 #include <list>
@@ -275,6 +275,11 @@ namespace llvm {
     /// Returns an existing SUnit for this MI, or nullptr.
     SUnit *getSUnit(MachineInstr *MI) const;
 
+    /// If this method returns true, handling of the scheduling regions
+    /// themselves (in case of a scheduling boundary in MBB) will be done
+    /// beginning with the topmost region of MBB.
+    virtual bool doMBBSchedRegionsTopDown() const { return false; }
+
     /// Prepares to perform scheduling in the given block.
     virtual void startBlock(MachineBasicBlock *BB);
 
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index d6851f7143a51..6a5c2db34bb10 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -211,6 +211,7 @@ class SelectionDAG {
   const SelectionDAGTargetInfo *TSI = nullptr;
   const TargetLowering *TLI = nullptr;
   MachineFunction *MF;
+  Pass *SDAGISelPass = nullptr;
   LLVMContext *Context;
   CodeGenOpt::Level OptLevel;
 
@@ -335,6 +336,14 @@ private:
         .getRawSubclassData();
   }
 
+  template <typename SDNodeTy>
+  static uint16_t getSyntheticNodeSubclassData(unsigned Opc, unsigned Order,
+                                                SDVTList VTs, EVT MemoryVT,
+                                                MachineMemOperand *MMO) {
+    return SDNodeTy(Opc, Order, DebugLoc(), VTs, MemoryVT, MMO)
+         .getRawSubclassData();
+  }
+
   void createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {
     assert(!Node->OperandList && "Node already has operands");
     SDUse *Ops = OperandRecycler.allocate(
@@ -366,13 +375,16 @@ public:
   ~SelectionDAG();
 
   /// Prepare this SelectionDAG to process code in the given MachineFunction.
-  void init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE);
+  void init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE,
+            Pass *PassPtr);
 
   /// Clear state and free memory necessary to make this
   /// SelectionDAG ready to process a new block.
   void clear();
 
   MachineFunction &getMachineFunction() const { return *MF; }
+  const Pass *getPass() const { return SDAGISelPass; }
+
   const DataLayout &getDataLayout() const { return MF->getDataLayout(); }
   const TargetMachine &getTarget() const { return TM; }
   const TargetSubtargetInfo &getSubtarget() const { return MF->getSubtarget(); }
@@ -631,6 +643,8 @@ public:
   SDValue getRegister(unsigned Reg, EVT VT);
   SDValue getRegisterMask(const uint32_t *RegMask);
   SDValue getEHLabel(const SDLoc &dl, SDValue Root, MCSymbol *Label);
+  SDValue getLabelNode(unsigned Opcode, const SDLoc &dl, SDValue Root,
+                       MCSymbol *Label);
   SDValue getBlockAddress(const BlockAddress *BA, EVT VT,
                           int64_t Offset = 0, bool isTarget = false,
                           unsigned char TargetFlags = 0);
@@ -782,6 +796,24 @@ public:
   /// \brief Create a logical NOT operation as (XOR Val, BooleanOne).
   SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT);
 
+  /// \brief Create an add instruction with appropriate flags when used for
+  /// addressing some offset of an object. i.e. if a load is split into multiple
+  /// components, create an add nuw from the base pointer to the offset.
+  SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Op, int64_t Offset) {
+    EVT VT = Op.getValueType();
+    return getObjectPtrOffset(SL, Op, getConstant(Offset, SL, VT));
+  }
+
+  SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Op, SDValue Offset) {
+    EVT VT = Op.getValueType();
+
+    // The object itself can't wrap around the address space, so it shouldn't be
+    // possible for the adds of the offsets to the split parts to overflow.
+    SDNodeFlags Flags;
+    Flags.setNoUnsignedWrap(true);
+    return getNode(ISD::ADD, SL, VT, Op, Offset, Flags);
+  }
+
   /// Return a new CALLSEQ_START node, that starts new call frame, in which
   /// InSize bytes are set up inside CALLSEQ_START..CALLSEQ_END sequence and
   /// OutSize specifies part of the frame set up prior to the sequence.
@@ -956,11 +988,14 @@ public:
   /// result and takes a list of operands. Opcode may be INTRINSIC_VOID,
   /// INTRINSIC_W_CHAIN, or a target-specific opcode with a value not
   /// less than FIRST_TARGET_MEMORY_OPCODE.
-  SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList,
-                              ArrayRef<SDValue> Ops, EVT MemVT,
-                              MachinePointerInfo PtrInfo, unsigned Align = 0,
-                              bool Vol = false, bool ReadMem = true,
-                              bool WriteMem = true, unsigned Size = 0);
+  SDValue getMemIntrinsicNode(
+    unsigned Opcode, const SDLoc &dl, SDVTList VTList,
+    ArrayRef<SDValue> Ops, EVT MemVT,
+    MachinePointerInfo PtrInfo,
+    unsigned Align = 0,
+    MachineMemOperand::Flags Flags
+    = MachineMemOperand::MOLoad | MachineMemOperand::MOStore,
+    unsigned Size = 0);
 
   SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList,
                               ArrayRef<SDValue> Ops, EVT MemVT,
@@ -1166,19 +1201,26 @@ public:
                           const SDNodeFlags Flags = SDNodeFlags());
 
   /// Creates a SDDbgValue node.
-  SDDbgValue *getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R,
-                          bool IsIndirect, uint64_t Off, const DebugLoc &DL,
+  SDDbgValue *getDbgValue(DIVariable *Var, DIExpression *Expr, SDNode *N,
+                          unsigned R, bool IsIndirect, const DebugLoc &DL,
                           unsigned O);
 
-  /// Constant
-  SDDbgValue *getConstantDbgValue(MDNode *Var, MDNode *Expr, const Value *C,
-                                  uint64_t Off, const DebugLoc &DL, unsigned O);
+  /// Creates a constant SDDbgValue node.
+  SDDbgValue *getConstantDbgValue(DIVariable *Var, DIExpression *Expr,
+                                  const Value *C, const DebugLoc &DL,
+                                  unsigned O);
 
-  /// FrameIndex
-  SDDbgValue *getFrameIndexDbgValue(MDNode *Var, MDNode *Expr, unsigned FI,
-                                    uint64_t Off, const DebugLoc &DL,
+  /// Creates a FrameIndex SDDbgValue node.
+  SDDbgValue *getFrameIndexDbgValue(DIVariable *Var, DIExpression *Expr,
+                                    unsigned FI, const DebugLoc &DL,
                                     unsigned O);
 
+  /// Transfer debug values from one node to another, while optionally
+  /// generating fragment expressions for split-up values. If \p InvalidateDbg
+  /// is set, debug values are invalidated after they are transferred.
+  void transferDbgValues(SDValue From, SDValue To, unsigned OffsetInBits = 0,
+                         unsigned SizeInBits = 0, bool InvalidateDbg = true);
+
   /// Remove the specified node from the system. If any of its
   /// operands then becomes dead, remove them as well. Inform UpdateListener
   /// for each node deleted.
@@ -1208,7 +1250,7 @@ public:
   void ReplaceAllUsesWith(SDNode *From, const SDValue *To);
 
   /// Replace any uses of From with To, leaving
-  /// uses of other values produced by From.Val alone.
+  /// uses of other values produced by From.getNode() alone.
   void ReplaceAllUsesOfValueWith(SDValue From, SDValue To);
 
   /// Like ReplaceAllUsesOfValueWith, but for multiple values at once.
@@ -1259,10 +1301,6 @@ public:
     return DbgInfo->getSDDbgValues(SD);
   }
 
-private:
-  /// Transfer SDDbgValues. Called via ReplaceAllUses{OfValue}?With
-  void TransferDbgValues(SDValue From, SDValue To);
-
 public:
   /// Return true if there are any SDDbgValue nodes associated
   /// with this SelectionDAG.
@@ -1279,6 +1317,10 @@ public:
     return DbgInfo->ByvalParmDbgEnd();
   }
 
+  /// To be invoked on an SDNode that is slated to be erased. This
+  /// function mirrors \c llvm::salvageDebugInfo.
+  void salvageDebugInfo(SDNode &N);
+
   void dump() const;
 
   /// Create a stack temporary, suitable for holding the specified value type.
@@ -1308,6 +1350,14 @@ public:
   SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond,
                     const SDLoc &dl);
 
+  /// See if the specified operand can be simplified with the knowledge that only
+  /// the bits specified by Mask are used.  If so, return the simpler operand,
+  /// otherwise return a null SDValue.
+  ///
+  /// (This exists alongside SimplifyDemandedBits because GetDemandedBits can
+  /// simplify nodes with multiple uses more aggressively.)
+  SDValue GetDemandedBits(SDValue V, const APInt &Mask);
+
   /// Return true if the sign bit of Op is known to be zero.
   /// We use this predicate to simplify operations downstream.
   bool SignBitIsZero(SDValue Op, unsigned Depth = 0) const;
diff --git a/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h b/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
index 2107e5a313819..18e4c7a83defb 100644
--- a/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
+++ b/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
@@ -1,5 +1,4 @@
-//===-- llvm/CodeGen/SelectionDAGAddressAnalysis.h  ------- DAG Address Analysis
-//---*- C++ -*-===//
+//===- SelectionDAGAddressAnalysis.h - DAG Address Analysis -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,16 +6,17 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
 
 #ifndef LLVM_CODEGEN_SELECTIONDAGADDRESSANALYSIS_H
 #define LLVM_CODEGEN_SELECTIONDAGADDRESSANALYSIS_H
 
-#include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
+#include <cstdint>
 
 namespace llvm {
+
+class SelectionDAG;
+
 /// Helper struct to parse and store a memory address as base + index + offset.
 /// We ignore sign extensions when it is safe to do so.
 /// The following two expressions are not equivalent. To differentiate we need
@@ -34,12 +34,11 @@ class BaseIndexOffset {
 private:
   SDValue Base;
   SDValue Index;
-  int64_t Offset;
-  bool IsIndexSignExt;
+  int64_t Offset = 0;
+  bool IsIndexSignExt = false;
 
 public:
-  BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
-
+  BaseIndexOffset() = default;
   BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
                   bool IsIndexSignExt)
       : Base(Base), Index(Index), Offset(Offset),
@@ -59,6 +58,7 @@ public:
   /// Parses tree in Ptr for base, index, offset addresses.
   static BaseIndexOffset match(SDValue Ptr, const SelectionDAG &DAG);
 };
-} // namespace llvm
 
-#endif
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_SELECTIONDAGADDRESSANALYSIS_H
diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h
index 591b2f773344d..de6849a1eae12 100644
--- a/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/include/llvm/CodeGen/SelectionDAGISel.h
@@ -17,9 +17,9 @@
 
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/Pass.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <memory>
 
 namespace llvm {
@@ -130,6 +130,7 @@ public:
     OPC_CheckOpcode,
     OPC_SwitchOpcode,
     OPC_CheckType,
+    OPC_CheckTypeRes,
     OPC_SwitchType,
     OPC_CheckChild0Type, OPC_CheckChild1Type, OPC_CheckChild2Type,
     OPC_CheckChild3Type, OPC_CheckChild4Type, OPC_CheckChild5Type,
@@ -275,6 +276,8 @@ public:
     return false;
   }
 
+  bool isOrEquivalentToAdd(const SDNode *N) const;
+
 private:
 
   // Calls to these functions are generated by tblgen.
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 5fb69ae232af8..7de2e766d521a 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -85,10 +85,7 @@ namespace ISD {
 
   /// If N is a BUILD_VECTOR node whose elements are all the same constant or
   /// undefined, return true and return the constant value in \p SplatValue.
-  /// This sets \p SplatValue to the smallest possible splat unless AllowShrink
-  /// is set to false.
-  bool isConstantSplatVector(const SDNode *N, APInt &SplatValue,
-                             bool AllowShrink = true);
+  bool isConstantSplatVector(const SDNode *N, APInt &SplatValue);
 
   /// Return true if the specified node is a BUILD_VECTOR where all of the
   /// elements are ~0 or undef.
@@ -626,13 +623,14 @@ public:
   /// Test if this node is a strict floating point pseudo-op.
   bool isStrictFPOpcode() {
     switch (NodeType) {
-      default: 
+      default:
         return false;
       case ISD::STRICT_FADD:
       case ISD::STRICT_FSUB:
       case ISD::STRICT_FMUL:
       case ISD::STRICT_FDIV:
       case ISD::STRICT_FREM:
+      case ISD::STRICT_FMA:
       case ISD::STRICT_FSQRT:
       case ISD::STRICT_FPOW:
       case ISD::STRICT_FPOWI:
@@ -1436,6 +1434,9 @@ public:
   const APInt &getAPIntValue() const { return Value->getValue(); }
   uint64_t getZExtValue() const { return Value->getZExtValue(); }
   int64_t getSExtValue() const { return Value->getSExtValue(); }
+  uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX) {
+    return Value->getLimitedValue(Limit);
+  }
 
   bool isOne() const { return Value->isOne(); }
   bool isNullValue() const { return Value->isZero(); }
@@ -1489,11 +1490,7 @@ public:
   /// convenient to write "2.0" and the like.  Without this function we'd
   /// have to duplicate its logic everywhere it's called.
   bool isExactlyValue(double V) const {
-    bool ignored;
-    APFloat Tmp(V);
-    Tmp.convert(Value->getValueAPF().getSemantics(),
-                APFloat::rmNearestTiesToEven, &ignored);
-    return isExactlyValue(Tmp);
+    return Value->getValueAPF().isExactlyValue(V);
   }
   bool isExactlyValue(const APFloat& V) const;
 
@@ -1850,19 +1847,20 @@ public:
   }
 };
 
-class EHLabelSDNode : public SDNode {
+class LabelSDNode : public SDNode {
   friend class SelectionDAG;
 
   MCSymbol *Label;
 
-  EHLabelSDNode(unsigned Order, const DebugLoc &dl, MCSymbol *L)
+  LabelSDNode(unsigned Order, const DebugLoc &dl, MCSymbol *L)
       : SDNode(ISD::EH_LABEL, Order, dl, getSDVTList(MVT::Other)), Label(L) {}
 
 public:
   MCSymbol *getLabel() const { return Label; }
 
   static bool classof(const SDNode *N) {
-    return N->getOpcode() == ISD::EH_LABEL;
+    return N->getOpcode() == ISD::EH_LABEL ||
+           N->getOpcode() == ISD::ANNOTATION_LABEL;
   }
 };
 
@@ -2017,6 +2015,9 @@ public:
   /// For integers this is the same as doing a TRUNCATE and storing the result.
   /// For floats, it is the same as doing an FP_ROUND and storing the result.
   bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
+  void setTruncatingStore(bool Truncating) {
+    StoreSDNodeBits.IsTruncating = Truncating;
+  }
 
   const SDValue &getValue() const { return getOperand(1); }
   const SDValue &getBasePtr() const { return getOperand(2); }
@@ -2113,7 +2114,7 @@ class MaskedGatherScatterSDNode : public MemSDNode {
 public:
   friend class SelectionDAG;
 
-  MaskedGatherScatterSDNode(unsigned NodeTy, unsigned Order,
+  MaskedGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order,
                             const DebugLoc &dl, SDVTList VTs, EVT MemVT,
                             MachineMemOperand *MMO)
       : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {}
diff --git a/include/llvm/CodeGen/SelectionDAGTargetInfo.h b/include/llvm/CodeGen/SelectionDAGTargetInfo.h
index ac5092af8def3..45c1df48a5e6f 100644
--- a/include/llvm/CodeGen/SelectionDAGTargetInfo.h
+++ b/include/llvm/CodeGen/SelectionDAGTargetInfo.h
@@ -1,4 +1,4 @@
-//==-- llvm/CodeGen/SelectionDAGTargetInfo.h - SelectionDAG Info -*- C++ -*-==//
+//==- llvm/CodeGen/SelectionDAGTargetInfo.h - SelectionDAG Info --*- C++ -*-==//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,21 +16,24 @@
 #ifndef LLVM_CODEGEN_SELECTIONDAGTARGETINFO_H
 #define LLVM_CODEGEN_SELECTIONDAGTARGETINFO_H
 
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/Support/CodeGen.h"
+#include <utility>
 
 namespace llvm {
 
+class SelectionDAG;
+
 //===----------------------------------------------------------------------===//
 /// Targets can subclass this to parameterize the
 /// SelectionDAG lowering and instruction selection process.
 ///
 class SelectionDAGTargetInfo {
-  SelectionDAGTargetInfo(const SelectionDAGTargetInfo &) = delete;
-  void operator=(const SelectionDAGTargetInfo &) = delete;
-
 public:
   explicit SelectionDAGTargetInfo() = default;
+  SelectionDAGTargetInfo(const SelectionDAGTargetInfo &) = delete;
+  SelectionDAGTargetInfo &operator=(const SelectionDAGTargetInfo &) = delete;
   virtual ~SelectionDAGTargetInfo();
 
   /// Emit target-specific code that performs a memcpy.
@@ -144,6 +147,7 @@ public:
                            MachinePointerInfo SrcPtrInfo) const {
     return std::make_pair(SDValue(), SDValue());
   }
+
   // Return true when the decision to generate FMA's (or FMS, FMLA etc) rather
   // than FMUL and ADD is delegated to the machine combiner.
   virtual bool generateFMAsInMachineCombiner(CodeGenOpt::Level OptLevel) const {
@@ -151,6 +155,6 @@ public:
   }
 };
 
-} // end llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_CODEGEN_SELECTIONDAGTARGETINFO_H
diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h
index a7b16e7a9ed22..3a91e363f9231 100644
--- a/include/llvm/CodeGen/SlotIndexes.h
+++ b/include/llvm/CodeGen/SlotIndexes.h
@@ -139,7 +139,7 @@ class raw_ostream;
     };
 
     /// Construct an invalid index.
-    SlotIndex() : lie(nullptr, 0) {}
+    SlotIndex() = default;
 
     // Construct a new slot index from the given one, and set the slot.
     SlotIndex(const SlotIndex &li, Slot s) : lie(li.listEntry(), unsigned(s)) {
diff --git a/include/llvm/CodeGen/StackMaps.h b/include/llvm/CodeGen/StackMaps.h
index 8263946ed9280..4407114d2741b 100644
--- a/include/llvm/CodeGen/StackMaps.h
+++ b/include/llvm/CodeGen/StackMaps.h
@@ -14,6 +14,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/IR/CallingConv.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/Debug.h"
 #include <algorithm>
 #include <cassert>
@@ -25,7 +26,6 @@ namespace llvm {
 class AsmPrinter;
 class MCExpr;
 class MCStreamer;
-class MCSymbol;
 class raw_ostream;
 class TargetRegisterInfo;
 
diff --git a/include/llvm/CodeGen/TailDuplicator.h b/include/llvm/CodeGen/TailDuplicator.h
index 483c0ab1eec9e..be6562c85f2ef 100644
--- a/include/llvm/CodeGen/TailDuplicator.h
+++ b/include/llvm/CodeGen/TailDuplicator.h
@@ -19,11 +19,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
 #include <utility>
 #include <vector>
 
@@ -61,13 +57,14 @@ class TailDuplicator {
 public:
   /// Prepare to run on a specific machine function.
   /// @param MF - Function that will be processed
+  /// @param PreRegAlloc - true if used before register allocation
   /// @param MBPI - Branch Probability Info. Used to propagate correct
   ///     probabilities when modifying the CFG.
   /// @param LayoutMode - When true, don't use the existing layout to make
   ///     decisions.
   /// @param TailDupSize - Maxmimum size of blocks to tail-duplicate. Zero
   ///     default implies using the command line value TailDupSize.
-  void initMF(MachineFunction &MF,
+  void initMF(MachineFunction &MF, bool PreRegAlloc,
               const MachineBranchProbabilityInfo *MBPI,
               bool LayoutMode, unsigned TailDupSize = 0);
 
diff --git a/include/llvm/CodeGen/TargetCallingConv.h b/include/llvm/CodeGen/TargetCallingConv.h
new file mode 100644
index 0000000000000..8646a15599cbc
--- /dev/null
+++ b/include/llvm/CodeGen/TargetCallingConv.h
@@ -0,0 +1,204 @@
+//===-- llvm/CodeGen/TargetCallingConv.h - Calling Convention ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines types for working with calling-convention information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_TARGETCALLINGCONV_H
+#define LLVM_CODEGEN_TARGETCALLINGCONV_H
+
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+#include <climits>
+#include <cstdint>
+
+namespace llvm {
+namespace ISD {
+
+  struct ArgFlagsTy {
+  private:
+    unsigned IsZExt : 1;     ///< Zero extended
+    unsigned IsSExt : 1;     ///< Sign extended
+    unsigned IsInReg : 1;    ///< Passed in register
+    unsigned IsSRet : 1;     ///< Hidden struct-ret ptr
+    unsigned IsByVal : 1;    ///< Struct passed by value
+    unsigned IsNest : 1;     ///< Nested fn static chain
+    unsigned IsReturned : 1; ///< Always returned
+    unsigned IsSplit : 1;
+    unsigned IsInAlloca : 1;   ///< Passed with inalloca
+    unsigned IsSplitEnd : 1;   ///< Last part of a split
+    unsigned IsSwiftSelf : 1;  ///< Swift self parameter
+    unsigned IsSwiftError : 1; ///< Swift error parameter
+    unsigned IsHva : 1;        ///< HVA field for
+    unsigned IsHvaStart : 1;   ///< HVA structure start
+    unsigned IsSecArgPass : 1; ///< Second argument
+    unsigned ByValAlign : 4;   ///< Log 2 of byval alignment
+    unsigned OrigAlign : 5;    ///< Log 2 of original alignment
+    unsigned IsInConsecutiveRegsLast : 1;
+    unsigned IsInConsecutiveRegs : 1;
+    unsigned IsCopyElisionCandidate : 1; ///< Argument copy elision candidate
+
+    unsigned ByValSize; ///< Byval struct size
+
+  public:
+    ArgFlagsTy()
+        : IsZExt(0), IsSExt(0), IsInReg(0), IsSRet(0), IsByVal(0), IsNest(0),
+          IsReturned(0), IsSplit(0), IsInAlloca(0), IsSplitEnd(0),
+          IsSwiftSelf(0), IsSwiftError(0), IsHva(0), IsHvaStart(0),
+          IsSecArgPass(0), ByValAlign(0), OrigAlign(0),
+          IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0),
+          IsCopyElisionCandidate(0), ByValSize(0) {
+      static_assert(sizeof(*this) == 2 * sizeof(unsigned), "flags are too big");
+    }
+
+    bool isZExt() const { return IsZExt; }
+    void setZExt() { IsZExt = 1; }
+
+    bool isSExt() const { return IsSExt; }
+    void setSExt() { IsSExt = 1; }
+
+    bool isInReg() const { return IsInReg; }
+    void setInReg() { IsInReg = 1; }
+
+    bool isSRet() const { return IsSRet; }
+    void setSRet() { IsSRet = 1; }
+
+    bool isByVal() const { return IsByVal; }
+    void setByVal() { IsByVal = 1; }
+
+    bool isInAlloca() const { return IsInAlloca; }
+    void setInAlloca() { IsInAlloca = 1; }
+
+    bool isSwiftSelf() const { return IsSwiftSelf; }
+    void setSwiftSelf() { IsSwiftSelf = 1; }
+
+    bool isSwiftError() const { return IsSwiftError; }
+    void setSwiftError() { IsSwiftError = 1; }
+
+    bool isHva() const { return IsHva; }
+    void setHva() { IsHva = 1; }
+
+    bool isHvaStart() const { return IsHvaStart; }
+    void setHvaStart() { IsHvaStart = 1; }
+
+    bool isSecArgPass() const { return IsSecArgPass; }
+    void setSecArgPass() { IsSecArgPass = 1; }
+
+    bool isNest() const { return IsNest; }
+    void setNest() { IsNest = 1; }
+
+    bool isReturned() const { return IsReturned; }
+    void setReturned() { IsReturned = 1; }
+
+    bool isInConsecutiveRegs()  const { return IsInConsecutiveRegs; }
+    void setInConsecutiveRegs() { IsInConsecutiveRegs = 1; }
+
+    bool isInConsecutiveRegsLast() const { return IsInConsecutiveRegsLast; }
+    void setInConsecutiveRegsLast() { IsInConsecutiveRegsLast = 1; }
+
+    bool isSplit()   const { return IsSplit; }
+    void setSplit()  { IsSplit = 1; }
+
+    bool isSplitEnd()   const { return IsSplitEnd; }
+    void setSplitEnd()  { IsSplitEnd = 1; }
+
+    bool isCopyElisionCandidate()  const { return IsCopyElisionCandidate; }
+    void setCopyElisionCandidate() { IsCopyElisionCandidate = 1; }
+
+    unsigned getByValAlign() const { return (1U << ByValAlign) / 2; }
+    void setByValAlign(unsigned A) {
+      ByValAlign = Log2_32(A) + 1;
+      assert(getByValAlign() == A && "bitfield overflow");
+    }
+
+    unsigned getOrigAlign() const { return (1U << OrigAlign) / 2; }
+    void setOrigAlign(unsigned A) {
+      OrigAlign = Log2_32(A) + 1;
+      assert(getOrigAlign() == A && "bitfield overflow");
+    }
+
+    unsigned getByValSize() const { return ByValSize; }
+    void setByValSize(unsigned S) { ByValSize = S; }
+  };
+
+  /// InputArg - This struct carries flags and type information about a
+  /// single incoming (formal) argument or incoming (from the perspective
+  /// of the caller) return value virtual register.
+  ///
+  struct InputArg {
+    ArgFlagsTy Flags;
+    MVT VT = MVT::Other;
+    EVT ArgVT;
+    bool Used = false;
+
+    /// Index original Function's argument.
+    unsigned OrigArgIndex;
+    /// Sentinel value for implicit machine-level input arguments.
+    static const unsigned NoArgIndex = UINT_MAX;
+
+    /// Offset in bytes of current input value relative to the beginning of
+    /// original argument. E.g. if argument was splitted into four 32 bit
+    /// registers, we got 4 InputArgs with PartOffsets 0, 4, 8 and 12.
+    unsigned PartOffset;
+
+    InputArg() = default;
+    InputArg(ArgFlagsTy flags, EVT vt, EVT argvt, bool used,
+             unsigned origIdx, unsigned partOffs)
+      : Flags(flags), Used(used), OrigArgIndex(origIdx), PartOffset(partOffs) {
+      VT = vt.getSimpleVT();
+      ArgVT = argvt;
+    }
+
+    bool isOrigArg() const {
+      return OrigArgIndex != NoArgIndex;
+    }
+
+    unsigned getOrigArgIndex() const {
+      assert(OrigArgIndex != NoArgIndex && "Implicit machine-level argument");
+      return OrigArgIndex;
+    }
+  };
+
+  /// OutputArg - This struct carries flags and a value for a
+  /// single outgoing (actual) argument or outgoing (from the perspective
+  /// of the caller) return value virtual register.
+  ///
+  struct OutputArg {
+    ArgFlagsTy Flags;
+    MVT VT;
+    EVT ArgVT;
+
+    /// IsFixed - Is this a "fixed" value, ie not passed through a vararg "...".
+    bool IsFixed = false;
+
+    /// Index original Function's argument.
+    unsigned OrigArgIndex;
+
+    /// Offset in bytes of current output value relative to the beginning of
+    /// original argument. E.g. if argument was splitted into four 32 bit
+    /// registers, we got 4 OutputArgs with PartOffsets 0, 4, 8 and 12.
+    unsigned PartOffset;
+
+    OutputArg() = default;
+    OutputArg(ArgFlagsTy flags, EVT vt, EVT argvt, bool isfixed,
+              unsigned origIdx, unsigned partOffs)
+      : Flags(flags), IsFixed(isfixed), OrigArgIndex(origIdx),
+        PartOffset(partOffs) {
+      VT = vt.getSimpleVT();
+      ArgVT = argvt;
+    }
+  };
+
+} // end namespace ISD
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_TARGETCALLINGCONV_H
diff --git a/include/llvm/CodeGen/TargetFrameLowering.h b/include/llvm/CodeGen/TargetFrameLowering.h
new file mode 100644
index 0000000000000..61f1cf07bcf2c
--- /dev/null
+++ b/include/llvm/CodeGen/TargetFrameLowering.h
@@ -0,0 +1,348 @@
+//===-- llvm/CodeGen/TargetFrameLowering.h ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface to describe the layout of a stack frame on the target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_TARGETFRAMELOWERING_H
+#define LLVM_CODEGEN_TARGETFRAMELOWERING_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include <utility>
+#include <vector>
+
+namespace llvm {
+  class BitVector;
+  class CalleeSavedInfo;
+  class MachineFunction;
+  class RegScavenger;
+
+/// Information about stack frame layout on the target.  It holds the direction
+/// of stack growth, the known stack alignment on entry to each function, and
+/// the offset to the locals area.
+///
+/// The offset to the local area is the offset from the stack pointer on
+/// function entry to the first location where function data (local variables,
+/// spill locations) can be stored.
+class TargetFrameLowering {
+public:
+  enum StackDirection {
+    StackGrowsUp,        // Adding to the stack increases the stack address
+    StackGrowsDown       // Adding to the stack decreases the stack address
+  };
+
+  // Maps a callee saved register to a stack slot with a fixed offset.
+  struct SpillSlot {
+    unsigned Reg;
+    int Offset; // Offset relative to stack pointer on function entry.
+  };
+private:
+  StackDirection StackDir;
+  unsigned StackAlignment;
+  unsigned TransientStackAlignment;
+  int LocalAreaOffset;
+  bool StackRealignable;
+public:
+  TargetFrameLowering(StackDirection D, unsigned StackAl, int LAO,
+                      unsigned TransAl = 1, bool StackReal = true)
+    : StackDir(D), StackAlignment(StackAl), TransientStackAlignment(TransAl),
+      LocalAreaOffset(LAO), StackRealignable(StackReal) {}
+
+  virtual ~TargetFrameLowering();
+
+  // These methods return information that describes the abstract stack layout
+  // of the target machine.
+
+  /// getStackGrowthDirection - Return the direction the stack grows
+  ///
+  StackDirection getStackGrowthDirection() const { return StackDir; }
+
+  /// getStackAlignment - This method returns the number of bytes to which the
+  /// stack pointer must be aligned on entry to a function.  Typically, this
+  /// is the largest alignment for any data object in the target.
+  ///
+  unsigned getStackAlignment() const { return StackAlignment; }
+
+  /// alignSPAdjust - This method aligns the stack adjustment to the correct
+  /// alignment.
+  ///
+  int alignSPAdjust(int SPAdj) const {
+    if (SPAdj < 0) {
+      SPAdj = -alignTo(-SPAdj, StackAlignment);
+    } else {
+      SPAdj = alignTo(SPAdj, StackAlignment);
+    }
+    return SPAdj;
+  }
+
+  /// getTransientStackAlignment - This method returns the number of bytes to
+  /// which the stack pointer must be aligned at all times, even between
+  /// calls.
+  ///
+  unsigned getTransientStackAlignment() const {
+    return TransientStackAlignment;
+  }
+
+  /// isStackRealignable - This method returns whether the stack can be
+  /// realigned.
+  bool isStackRealignable() const {
+    return StackRealignable;
+  }
+
+  /// Return the skew that has to be applied to stack alignment under
+  /// certain conditions (e.g. stack was adjusted before function \p MF
+  /// was called).
+  virtual unsigned getStackAlignmentSkew(const MachineFunction &MF) const;
+
+  /// getOffsetOfLocalArea - This method returns the offset of the local area
+  /// from the stack pointer on entrance to a function.
+  ///
+  int getOffsetOfLocalArea() const { return LocalAreaOffset; }
+
+  /// isFPCloseToIncomingSP - Return true if the frame pointer is close to
+  /// the incoming stack pointer, false if it is close to the post-prologue
+  /// stack pointer.
+  virtual bool isFPCloseToIncomingSP() const { return true; }
+
+  /// assignCalleeSavedSpillSlots - Allows target to override spill slot
+  /// assignment logic.  If implemented, assignCalleeSavedSpillSlots() should
+  /// assign frame slots to all CSI entries and return true.  If this method
+  /// returns false, spill slots will be assigned using generic implementation.
+  /// assignCalleeSavedSpillSlots() may add, delete or rearrange elements of
+  /// CSI.
+  virtual bool
+  assignCalleeSavedSpillSlots(MachineFunction &MF,
+                              const TargetRegisterInfo *TRI,
+                              std::vector<CalleeSavedInfo> &CSI) const {
+    return false;
+  }
+
+  /// getCalleeSavedSpillSlots - This method returns a pointer to an array of
+  /// pairs, that contains an entry for each callee saved register that must be
+  /// spilled to a particular stack location if it is spilled.
+  ///
+  /// Each entry in this array contains a <register,offset> pair, indicating the
+  /// fixed offset from the incoming stack pointer that each register should be
+  /// spilled at. If a register is not listed here, the code generator is
+  /// allowed to spill it anywhere it chooses.
+  ///
+  virtual const SpillSlot *
+  getCalleeSavedSpillSlots(unsigned &NumEntries) const {
+    NumEntries = 0;
+    return nullptr;
+  }
+
+  /// targetHandlesStackFrameRounding - Returns true if the target is
+  /// responsible for rounding up the stack frame (probably at emitPrologue
+  /// time).
+  virtual bool targetHandlesStackFrameRounding() const {
+    return false;
+  }
+
+  /// Returns true if the target will correctly handle shrink wrapping.
+  virtual bool enableShrinkWrapping(const MachineFunction &MF) const {
+    return false;
+  }
+
+  /// Returns true if the stack slot holes in the fixed and callee-save stack
+  /// area should be used when allocating other stack locations to reduce stack
+  /// size.
+  virtual bool enableStackSlotScavenging(const MachineFunction &MF) const {
+    return false;
+  }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  virtual void emitPrologue(MachineFunction &MF,
+                            MachineBasicBlock &MBB) const = 0;
+  virtual void emitEpilogue(MachineFunction &MF,
+                            MachineBasicBlock &MBB) const = 0;
+
+  /// Replace a StackProbe stub (if any) with the actual probe code inline
+  virtual void inlineStackProbe(MachineFunction &MF,
+                                MachineBasicBlock &PrologueMBB) const {}
+
+  /// Adjust the prologue to have the function use segmented stacks. This works
+  /// by adding a check even before the "normal" function prologue.
+  virtual void adjustForSegmentedStacks(MachineFunction &MF,
+                                        MachineBasicBlock &PrologueMBB) const {}
+
+  /// Adjust the prologue to add Erlang Run-Time System (ERTS) specific code in
+  /// the assembly prologue to explicitly handle the stack.
+  virtual void adjustForHiPEPrologue(MachineFunction &MF,
+                                     MachineBasicBlock &PrologueMBB) const {}
+
+  /// spillCalleeSavedRegisters - Issues instruction(s) to spill all callee
+  /// saved registers and returns true if it isn't possible / profitable to do
+  /// so by issuing a series of store instructions via
+  /// storeRegToStackSlot(). Returns false otherwise.
+  virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                         MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                         const TargetRegisterInfo *TRI) const {
+    return false;
+  }
+
+  /// restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee
+  /// saved registers and returns true if it isn't possible / profitable to do
+  /// so by issuing a series of load instructions via loadRegToStackSlot().
+  /// If it returns true, and any of the registers in CSI is not restored,
+  /// it sets the corresponding Restored flag in CSI to false.
+  /// Returns false otherwise.
+  virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                           MachineBasicBlock::iterator MI,
+                                           std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const {
+    return false;
+  }
+
+  /// Return true if the target needs to disable frame pointer elimination.
+  virtual bool noFramePointerElim(const MachineFunction &MF) const;
+
+  /// hasFP - Return true if the specified function should have a dedicated
+  /// frame pointer register. For most targets this is true only if the function
+  /// has variable sized allocas or if frame pointer elimination is disabled.
+  virtual bool hasFP(const MachineFunction &MF) const = 0;
+
+  /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
+  /// not required, we reserve argument space for call sites in the function
+  /// immediately on entry to the current function. This eliminates the need for
+  /// add/sub sp brackets around call sites. Returns true if the call frame is
+  /// included as part of the stack frame.
+  virtual bool hasReservedCallFrame(const MachineFunction &MF) const {
+    return !hasFP(MF);
+  }
+
+  /// canSimplifyCallFramePseudos - When possible, it's best to simplify the
+  /// call frame pseudo ops before doing frame index elimination. This is
+  /// possible only when frame index references between the pseudos won't
+  /// need adjusting for the call frame adjustments. Normally, that's true
+  /// if the function has a reserved call frame or a frame pointer. Some
+  /// targets (Thumb2, for example) may have more complicated criteria,
+  /// however, and can override this behavior.
+  virtual bool canSimplifyCallFramePseudos(const MachineFunction &MF) const {
+    return hasReservedCallFrame(MF) || hasFP(MF);
+  }
+
+  // needsFrameIndexResolution - Do we need to perform FI resolution for
+  // this function. Normally, this is required only when the function
+  // has any stack objects. However, targets may want to override this.
+  virtual bool needsFrameIndexResolution(const MachineFunction &MF) const;
+
+  /// getFrameIndexReference - This method should return the base register
+  /// and offset used to reference a frame index location. The offset is
+  /// returned directly, and the base register is returned via FrameReg.
+  virtual int getFrameIndexReference(const MachineFunction &MF, int FI,
+                                     unsigned &FrameReg) const;
+
+  /// Same as \c getFrameIndexReference, except that the stack pointer (as
+  /// opposed to the frame pointer) will be the preferred value for \p
+  /// FrameReg. This is generally used for emitting statepoint or EH tables that
+  /// use offsets from RSP.  If \p IgnoreSPUpdates is true, the returned
+  /// offset is only guaranteed to be valid with respect to the value of SP at
+  /// the end of the prologue.
+  virtual int getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI,
+                                             unsigned &FrameReg,
+                                             bool IgnoreSPUpdates) const {
+    // Always safe to dispatch to getFrameIndexReference.
+    return getFrameIndexReference(MF, FI, FrameReg);
+  }
+
+  /// This method determines which of the registers reported by
+  /// TargetRegisterInfo::getCalleeSavedRegs() should actually get saved.
+  /// The default implementation checks populates the \p SavedRegs bitset with
+  /// all registers which are modified in the function, targets may override
+  /// this function to save additional registers.
+  /// This method also sets up the register scavenger ensuring there is a free
+  /// register or a frameindex available.
+  virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+                                    RegScavenger *RS = nullptr) const;
+
+  /// processFunctionBeforeFrameFinalized - This method is called immediately
+  /// before the specified function's frame layout (MF.getFrameInfo()) is
+  /// finalized.  Once the frame is finalized, MO_FrameIndex operands are
+  /// replaced with direct constants.  This method is optional.
+  ///
+  virtual void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+                                             RegScavenger *RS = nullptr) const {
+  }
+
+  virtual unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const {
+    report_fatal_error("WinEH not implemented for this target");
+  }
+
+  /// This method is called during prolog/epilog code insertion to eliminate
+  /// call frame setup and destroy pseudo instructions (but only if the Target
+  /// is using them).  It is responsible for eliminating these instructions,
+  /// replacing them with concrete instructions.  This method need only be
+  /// implemented if using call frame setup/destroy pseudo instructions.
+  /// Returns an iterator pointing to the instruction after the replaced one.
+  virtual MachineBasicBlock::iterator
+  eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MI) const {
+    llvm_unreachable("Call Frame Pseudo Instructions do not exist on this "
+                     "target!");
+  }
+
+
+  /// Order the symbols in the local stack frame.
+  /// The list of objects that we want to order is in \p objectsToAllocate as
+  /// indices into the MachineFrameInfo. The array can be reordered in any way
+  /// upon return. The contents of the array, however, may not be modified (i.e.
+  /// only their order may be changed).
+  /// By default, just maintain the original order.
+  virtual void
+  orderFrameObjects(const MachineFunction &MF,
+                    SmallVectorImpl<int> &objectsToAllocate) const {
+  }
+
+  /// Check whether or not the given \p MBB can be used as a prologue
+  /// for the target.
+  /// The prologue will be inserted first in this basic block.
+  /// This method is used by the shrink-wrapping pass to decide if
+  /// \p MBB will be correctly handled by the target.
+  /// As soon as the target enable shrink-wrapping without overriding
+  /// this method, we assume that each basic block is a valid
+  /// prologue.
+  virtual bool canUseAsPrologue(const MachineBasicBlock &MBB) const {
+    return true;
+  }
+
+  /// Check whether or not the given \p MBB can be used as a epilogue
+  /// for the target.
+  /// The epilogue will be inserted before the first terminator of that block.
+  /// This method is used by the shrink-wrapping pass to decide if
+  /// \p MBB will be correctly handled by the target.
+  /// As soon as the target enable shrink-wrapping without overriding
+  /// this method, we assume that each basic block is a valid
+  /// epilogue.
+  virtual bool canUseAsEpilogue(const MachineBasicBlock &MBB) const {
+    return true;
+  }
+
+  /// Check if given function is safe for not having callee saved registers.
+  /// This is used when interprocedural register allocation is enabled.
+  static bool isSafeForNoCSROpt(const Function &F) {
+    if (!F.hasLocalLinkage() || F.hasAddressTaken() ||
+        !F.hasFnAttribute(Attribute::NoRecurse))
+      return false;
+    // Function should not be optimized as tail call.
+    for (const User *U : F.users())
+      if (auto CS = ImmutableCallSite(U))
+        if (CS.isTailCall())
+          return false;
+    return true;
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/CodeGen/TargetInstrInfo.h b/include/llvm/CodeGen/TargetInstrInfo.h
new file mode 100644
index 0000000000000..38a1b33aecad8
--- /dev/null
+++ b/include/llvm/CodeGen/TargetInstrInfo.h
@@ -0,0 +1,1691 @@
+//===- llvm/CodeGen/TargetInstrInfo.h - Instruction Info --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the target machine instruction set to the code generator.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETINSTRINFO_H
+#define LLVM_TARGET_TARGETINSTRINFO_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/None.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineCombinerPattern.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+
+class DFAPacketizer;
+class InstrItineraryData;
+class LiveIntervals;
+class LiveVariables;
+class MachineMemOperand;
+class MachineRegisterInfo;
+class MCAsmInfo;
+class MCInst;
+struct MCSchedModel;
+class Module;
+class ScheduleDAG;
+class ScheduleHazardRecognizer;
+class SDNode;
+class SelectionDAG;
+class RegScavenger;
+class TargetRegisterClass;
+class TargetRegisterInfo;
+class TargetSchedModel;
+class TargetSubtargetInfo;
+
+template <class T> class SmallVectorImpl;
+
+//---------------------------------------------------------------------------
+///
+/// TargetInstrInfo - Interface to description of machine instruction set
+///
+class TargetInstrInfo : public MCInstrInfo {
+public:
+  TargetInstrInfo(unsigned CFSetupOpcode = ~0u, unsigned CFDestroyOpcode = ~0u,
+                  unsigned CatchRetOpcode = ~0u, unsigned ReturnOpcode = ~0u)
+      : CallFrameSetupOpcode(CFSetupOpcode),
+        CallFrameDestroyOpcode(CFDestroyOpcode), CatchRetOpcode(CatchRetOpcode),
+        ReturnOpcode(ReturnOpcode) {}
+  TargetInstrInfo(const TargetInstrInfo &) = delete;
+  TargetInstrInfo &operator=(const TargetInstrInfo &) = delete;
+  virtual ~TargetInstrInfo();
+
+  static bool isGenericOpcode(unsigned Opc) {
+    return Opc <= TargetOpcode::GENERIC_OP_END;
+  }
+
+  /// Given a machine instruction descriptor, returns the register
+  /// class constraint for OpNum, or NULL.
+  const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum,
+                                         const TargetRegisterInfo *TRI,
+                                         const MachineFunction &MF) const;
+
+  /// Return true if the instruction is trivially rematerializable, meaning it
+  /// has no side effects and requires no operands that aren't always available.
+  /// This means the only allowed uses are constants and unallocatable physical
+  /// registers so that the instructions result is independent of the place
+  /// in the function.
+  bool isTriviallyReMaterializable(const MachineInstr &MI,
+                                   AliasAnalysis *AA = nullptr) const {
+    return MI.getOpcode() == TargetOpcode::IMPLICIT_DEF ||
+           (MI.getDesc().isRematerializable() &&
+            (isReallyTriviallyReMaterializable(MI, AA) ||
+             isReallyTriviallyReMaterializableGeneric(MI, AA)));
+  }
+
+protected:
+  /// For instructions with opcodes for which the M_REMATERIALIZABLE flag is
+  /// set, this hook lets the target specify whether the instruction is actually
+  /// trivially rematerializable, taking into consideration its operands. This
+  /// predicate must return false if the instruction has any side effects other
+  /// than producing a value, or if it requres any address registers that are
+  /// not always available.
+  /// Requirements must be check as stated in isTriviallyReMaterializable() .
+  virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI,
+                                                 AliasAnalysis *AA) const {
+    return false;
+  }
+
+  /// This method commutes the operands of the given machine instruction MI.
+  /// The operands to be commuted are specified by their indices OpIdx1 and
+  /// OpIdx2.
+  ///
+  /// If a target has any instructions that are commutable but require
+  /// converting to different instructions or making non-trivial changes
+  /// to commute them, this method can be overloaded to do that.
+  /// The default implementation simply swaps the commutable operands.
+  ///
+  /// If NewMI is false, MI is modified in place and returned; otherwise, a
+  /// new machine instruction is created and returned.
+  ///
+  /// Do not call this method for a non-commutable instruction.
+  /// Even though the instruction is commutable, the method may still
+  /// fail to commute the operands, null pointer is returned in such cases.
+  virtual MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
+                                               unsigned OpIdx1,
+                                               unsigned OpIdx2) const;
+
+  /// Assigns the (CommutableOpIdx1, CommutableOpIdx2) pair of commutable
+  /// operand indices to (ResultIdx1, ResultIdx2).
+  /// One or both input values of the pair: (ResultIdx1, ResultIdx2) may be
+  /// predefined to some indices or be undefined (designated by the special
+  /// value 'CommuteAnyOperandIndex').
+  /// The predefined result indices cannot be re-defined.
+  /// The function returns true iff after the result pair redefinition
+  /// the fixed result pair is equal to or equivalent to the source pair of
+  /// indices: (CommutableOpIdx1, CommutableOpIdx2). It is assumed here that
+  /// the pairs (x,y) and (y,x) are equivalent.
+  static bool fixCommutedOpIndices(unsigned &ResultIdx1, unsigned &ResultIdx2,
+                                   unsigned CommutableOpIdx1,
+                                   unsigned CommutableOpIdx2);
+
+private:
+  /// For instructions with opcodes for which the M_REMATERIALIZABLE flag is
+  /// set and the target hook isReallyTriviallyReMaterializable returns false,
+  /// this function does target-independent tests to determine if the
+  /// instruction is really trivially rematerializable.
+  bool isReallyTriviallyReMaterializableGeneric(const MachineInstr &MI,
+                                                AliasAnalysis *AA) const;
+
+public:
+  /// These methods return the opcode of the frame setup/destroy instructions
+  /// if they exist (-1 otherwise).  Some targets use pseudo instructions in
+  /// order to abstract away the difference between operating with a frame
+  /// pointer and operating without, through the use of these two instructions.
+  ///
+  unsigned getCallFrameSetupOpcode() const { return CallFrameSetupOpcode; }
+  unsigned getCallFrameDestroyOpcode() const { return CallFrameDestroyOpcode; }
+
+  /// Returns true if the argument is a frame pseudo instruction.
+  bool isFrameInstr(const MachineInstr &I) const {
+    return I.getOpcode() == getCallFrameSetupOpcode() ||
+           I.getOpcode() == getCallFrameDestroyOpcode();
+  }
+
+  /// Returns true if the argument is a frame setup pseudo instruction.
+  bool isFrameSetup(const MachineInstr &I) const {
+    return I.getOpcode() == getCallFrameSetupOpcode();
+  }
+
+  /// Returns size of the frame associated with the given frame instruction.
+  /// For frame setup instruction this is frame that is set up space set up
+  /// after the instruction. For frame destroy instruction this is the frame
+  /// freed by the caller.
+  /// Note, in some cases a call frame (or a part of it) may be prepared prior
+  /// to the frame setup instruction. It occurs in the calls that involve
+  /// inalloca arguments. This function reports only the size of the frame part
+  /// that is set up between the frame setup and destroy pseudo instructions.
+  int64_t getFrameSize(const MachineInstr &I) const {
+    assert(isFrameInstr(I) && "Not a frame instruction");
+    assert(I.getOperand(0).getImm() >= 0);
+    return I.getOperand(0).getImm();
+  }
+
+  /// Returns the total frame size, which is made up of the space set up inside
+  /// the pair of frame start-stop instructions and the space that is set up
+  /// prior to the pair.
+  int64_t getFrameTotalSize(const MachineInstr &I) const {
+    if (isFrameSetup(I)) {
+      assert(I.getOperand(1).getImm() >= 0 &&
+             "Frame size must not be negative");
+      return getFrameSize(I) + I.getOperand(1).getImm();
+    }
+    return getFrameSize(I);
+  }
+
+  unsigned getCatchReturnOpcode() const { return CatchRetOpcode; }
+  unsigned getReturnOpcode() const { return ReturnOpcode; }
+
+  /// Returns the actual stack pointer adjustment made by an instruction
+  /// as part of a call sequence. By default, only call frame setup/destroy
+  /// instructions adjust the stack, but targets may want to override this
+  /// to enable more fine-grained adjustment, or adjust by a different value.
+  virtual int getSPAdjust(const MachineInstr &MI) const;
+
+  /// Return true if the instruction is a "coalescable" extension instruction.
+  /// That is, it's like a copy where it's legal for the source to overlap the
+  /// destination. e.g. X86::MOVSX64rr32. If this returns true, then it's
+  /// expected the pre-extension value is available as a subreg of the result
+  /// register. This also returns the sub-register index in SubIdx.
+  virtual bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
+                                     unsigned &DstReg, unsigned &SubIdx) const {
+    return false;
+  }
+
+  /// If the specified machine instruction is a direct
+  /// load from a stack slot, return the virtual or physical register number of
+  /// the destination along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than loading from the stack slot.
+  virtual unsigned isLoadFromStackSlot(const MachineInstr &MI,
+                                       int &FrameIndex) const {
+    return 0;
+  }
+
+  /// Check for post-frame ptr elimination stack locations as well.
+  /// This uses a heuristic so it isn't reliable for correctness.
+  virtual unsigned isLoadFromStackSlotPostFE(const MachineInstr &MI,
+                                             int &FrameIndex) const {
+    return 0;
+  }
+
+  /// If the specified machine instruction has a load from a stack slot,
+  /// return true along with the FrameIndex of the loaded stack slot and the
+  /// machine mem operand containing the reference.
+  /// If not, return false.  Unlike isLoadFromStackSlot, this returns true for
+  /// any instructions that loads from the stack.  This is just a hint, as some
+  /// cases may be missed.
+  virtual bool hasLoadFromStackSlot(const MachineInstr &MI,
+                                    const MachineMemOperand *&MMO,
+                                    int &FrameIndex) const;
+
+  /// If the specified machine instruction is a direct
+  /// store to a stack slot, return the virtual or physical register number of
+  /// the source reg along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than storing to the stack slot.
+  virtual unsigned isStoreToStackSlot(const MachineInstr &MI,
+                                      int &FrameIndex) const {
+    return 0;
+  }
+
+  /// Check for post-frame ptr elimination stack locations as well.
+  /// This uses a heuristic, so it isn't reliable for correctness.
+  virtual unsigned isStoreToStackSlotPostFE(const MachineInstr &MI,
+                                            int &FrameIndex) const {
+    return 0;
+  }
+
+  /// If the specified machine instruction has a store to a stack slot,
+  /// return true along with the FrameIndex of the loaded stack slot and the
+  /// machine mem operand containing the reference.
+  /// If not, return false.  Unlike isStoreToStackSlot,
+  /// this returns true for any instructions that stores to the
+  /// stack.  This is just a hint, as some cases may be missed.
+  virtual bool hasStoreToStackSlot(const MachineInstr &MI,
+                                   const MachineMemOperand *&MMO,
+                                   int &FrameIndex) const;
+
+  /// Return true if the specified machine instruction
+  /// is a copy of one stack slot to another and has no other effect.
+  /// Provide the identity of the two frame indices.
+  virtual bool isStackSlotCopy(const MachineInstr &MI, int &DestFrameIndex,
+                               int &SrcFrameIndex) const {
+    return false;
+  }
+
+  /// Compute the size in bytes and offset within a stack slot of a spilled
+  /// register or subregister.
+  ///
+  /// \param [out] Size in bytes of the spilled value.
+  /// \param [out] Offset in bytes within the stack slot.
+  /// \returns true if both Size and Offset are successfully computed.
+  ///
+  /// Not all subregisters have computable spill slots. For example,
+  /// subregisters registers may not be byte-sized, and a pair of discontiguous
+  /// subregisters has no single offset.
+  ///
+  /// Targets with nontrivial bigendian implementations may need to override
+  /// this, particularly to support spilled vector registers.
+  virtual bool getStackSlotRange(const TargetRegisterClass *RC, unsigned SubIdx,
+                                 unsigned &Size, unsigned &Offset,
+                                 const MachineFunction &MF) const;
+
+  /// Returns the size in bytes of the specified MachineInstr, or ~0U
+  /// when this function is not implemented by a target.
+  virtual unsigned getInstSizeInBytes(const MachineInstr &MI) const {
+    return ~0U;
+  }
+
+  /// Return true if the instruction is as cheap as a move instruction.
+  ///
+  /// Targets for different archs need to override this, and different
+  /// micro-architectures can also be finely tuned inside.
+  virtual bool isAsCheapAsAMove(const MachineInstr &MI) const {
+    return MI.isAsCheapAsAMove();
+  }
+
+  /// Return true if the instruction should be sunk by MachineSink.
+  ///
+  /// MachineSink determines on its own whether the instruction is safe to sink;
+  /// this gives the target a hook to override the default behavior with regards
+  /// to which instructions should be sunk.
+  virtual bool shouldSink(const MachineInstr &MI) const { return true; }
+
+  /// Re-issue the specified 'original' instruction at the
+  /// specific location targeting a new destination register.
+  /// The register in Orig->getOperand(0).getReg() will be substituted by
+  /// DestReg:SubIdx. Any existing subreg index is preserved or composed with
+  /// SubIdx.
+  virtual void reMaterialize(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator MI, unsigned DestReg,
+                             unsigned SubIdx, const MachineInstr &Orig,
+                             const TargetRegisterInfo &TRI) const;
+
+  /// \brief Clones instruction or the whole instruction bundle \p Orig and
+  /// insert into \p MBB before \p InsertBefore. The target may update operands
+  /// that are required to be unique.
+  ///
+  /// \p Orig must not return true for MachineInstr::isNotDuplicable().
+  virtual MachineInstr &duplicate(MachineBasicBlock &MBB,
+                                  MachineBasicBlock::iterator InsertBefore,
+                                  const MachineInstr &Orig) const;
+
+  /// This method must be implemented by targets that
+  /// set the M_CONVERTIBLE_TO_3_ADDR flag.  When this flag is set, the target
+  /// may be able to convert a two-address instruction into one or more true
+  /// three-address instructions on demand.  This allows the X86 target (for
+  /// example) to convert ADD and SHL instructions into LEA instructions if they
+  /// would require register copies due to two-addressness.
+  ///
+  /// This method returns a null pointer if the transformation cannot be
+  /// performed, otherwise it returns the last new instruction.
+  ///
+  virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
+                                              MachineInstr &MI,
+                                              LiveVariables *LV) const {
+    return nullptr;
+  }
+
+  // This constant can be used as an input value of operand index passed to
+  // the method findCommutedOpIndices() to tell the method that the
+  // corresponding operand index is not pre-defined and that the method
+  // can pick any commutable operand.
+  static const unsigned CommuteAnyOperandIndex = ~0U;
+
+  /// This method commutes the operands of the given machine instruction MI.
+  ///
+  /// The operands to be commuted are specified by their indices OpIdx1 and
+  /// OpIdx2. OpIdx1 and OpIdx2 arguments may be set to a special value
+  /// 'CommuteAnyOperandIndex', which means that the method is free to choose
+  /// any arbitrarily chosen commutable operand. If both arguments are set to
+  /// 'CommuteAnyOperandIndex' then the method looks for 2 different commutable
+  /// operands; then commutes them if such operands could be found.
+  ///
+  /// If NewMI is false, MI is modified in place and returned; otherwise, a
+  /// new machine instruction is created and returned.
+  ///
+  /// Do not call this method for a non-commutable instruction or
+  /// for non-commuable operands.
+  /// Even though the instruction is commutable, the method may still
+  /// fail to commute the operands, null pointer is returned in such cases.
+  MachineInstr *
+  commuteInstruction(MachineInstr &MI, bool NewMI = false,
+                     unsigned OpIdx1 = CommuteAnyOperandIndex,
+                     unsigned OpIdx2 = CommuteAnyOperandIndex) const;
+
+  /// Returns true iff the routine could find two commutable operands in the
+  /// given machine instruction.
+  /// The 'SrcOpIdx1' and 'SrcOpIdx2' are INPUT and OUTPUT arguments.
+  /// If any of the INPUT values is set to the special value
+  /// 'CommuteAnyOperandIndex' then the method arbitrarily picks a commutable
+  /// operand, then returns its index in the corresponding argument.
+  /// If both of INPUT values are set to 'CommuteAnyOperandIndex' then method
+  /// looks for 2 commutable operands.
+  /// If INPUT values refer to some operands of MI, then the method simply
+  /// returns true if the corresponding operands are commutable and returns
+  /// false otherwise.
+  ///
+  /// For example, calling this method this way:
+  ///     unsigned Op1 = 1, Op2 = CommuteAnyOperandIndex;
+  ///     findCommutedOpIndices(MI, Op1, Op2);
+  /// can be interpreted as a query asking to find an operand that would be
+  /// commutable with the operand#1.
+  virtual bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
+                                     unsigned &SrcOpIdx2) const;
+
+  /// A pair composed of a register and a sub-register index.
+  /// Used to give some type checking when modeling Reg:SubReg.
+  struct RegSubRegPair {
+    unsigned Reg;
+    unsigned SubReg;
+
+    RegSubRegPair(unsigned Reg = 0, unsigned SubReg = 0)
+        : Reg(Reg), SubReg(SubReg) {}
+  };
+
+  /// A pair composed of a pair of a register and a sub-register index,
+  /// and another sub-register index.
+  /// Used to give some type checking when modeling Reg:SubReg1, SubReg2.
+  struct RegSubRegPairAndIdx : RegSubRegPair {
+    unsigned SubIdx;
+
+    RegSubRegPairAndIdx(unsigned Reg = 0, unsigned SubReg = 0,
+                        unsigned SubIdx = 0)
+        : RegSubRegPair(Reg, SubReg), SubIdx(SubIdx) {}
+  };
+
+  /// Build the equivalent inputs of a REG_SEQUENCE for the given \p MI
+  /// and \p DefIdx.
+  /// \p [out] InputRegs of the equivalent REG_SEQUENCE. Each element of
+  /// the list is modeled as <Reg:SubReg, SubIdx>.
+  /// E.g., REG_SEQUENCE %1:sub1, sub0, %2, sub1 would produce
+  /// two elements:
+  /// - %1:sub1, sub0
+  /// - %2<:0>, sub1
+  ///
+  /// \returns true if it is possible to build such an input sequence
+  /// with the pair \p MI, \p DefIdx. False otherwise.
+  ///
+  /// \pre MI.isRegSequence() or MI.isRegSequenceLike().
+  ///
+  /// \note The generic implementation does not provide any support for
+  /// MI.isRegSequenceLike(). In other words, one has to override
+  /// getRegSequenceLikeInputs for target specific instructions.
+  bool
+  getRegSequenceInputs(const MachineInstr &MI, unsigned DefIdx,
+                       SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const;
+
+  /// Build the equivalent inputs of a EXTRACT_SUBREG for the given \p MI
+  /// and \p DefIdx.
+  /// \p [out] InputReg of the equivalent EXTRACT_SUBREG.
+  /// E.g., EXTRACT_SUBREG %1:sub1, sub0, sub1 would produce:
+  /// - %1:sub1, sub0
+  ///
+  /// \returns true if it is possible to build such an input sequence
+  /// with the pair \p MI, \p DefIdx. False otherwise.
+  ///
+  /// \pre MI.isExtractSubreg() or MI.isExtractSubregLike().
+  ///
+  /// \note The generic implementation does not provide any support for
+  /// MI.isExtractSubregLike(). In other words, one has to override
+  /// getExtractSubregLikeInputs for target specific instructions.
+  bool getExtractSubregInputs(const MachineInstr &MI, unsigned DefIdx,
+                              RegSubRegPairAndIdx &InputReg) const;
+
+  /// Build the equivalent inputs of a INSERT_SUBREG for the given \p MI
+  /// and \p DefIdx.
+  /// \p [out] BaseReg and \p [out] InsertedReg contain
+  /// the equivalent inputs of INSERT_SUBREG.
+  /// E.g., INSERT_SUBREG %0:sub0, %1:sub1, sub3 would produce:
+  /// - BaseReg: %0:sub0
+  /// - InsertedReg: %1:sub1, sub3
+  ///
+  /// \returns true if it is possible to build such an input sequence
+  /// with the pair \p MI, \p DefIdx. False otherwise.
+  ///
+  /// \pre MI.isInsertSubreg() or MI.isInsertSubregLike().
+  ///
+  /// \note The generic implementation does not provide any support for
+  /// MI.isInsertSubregLike(). In other words, one has to override
+  /// getInsertSubregLikeInputs for target specific instructions.
+  bool getInsertSubregInputs(const MachineInstr &MI, unsigned DefIdx,
+                             RegSubRegPair &BaseReg,
+                             RegSubRegPairAndIdx &InsertedReg) const;
+
+  /// Return true if two machine instructions would produce identical values.
+  /// By default, this is only true when the two instructions
+  /// are deemed identical except for defs. If this function is called when the
+  /// IR is still in SSA form, the caller can pass the MachineRegisterInfo for
+  /// aggressive checks.
+  virtual bool produceSameValue(const MachineInstr &MI0,
+                                const MachineInstr &MI1,
+                                const MachineRegisterInfo *MRI = nullptr) const;
+
+  /// \returns true if a branch from an instruction with opcode \p BranchOpc
+  ///  bytes is capable of jumping to a position \p BrOffset bytes away.
+  virtual bool isBranchOffsetInRange(unsigned BranchOpc,
+                                     int64_t BrOffset) const {
+    llvm_unreachable("target did not implement");
+  }
+
+  /// \returns The block that branch instruction \p MI jumps to.
+  virtual MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const {
+    llvm_unreachable("target did not implement");
+  }
+
+  /// Insert an unconditional indirect branch at the end of \p MBB to \p
+  /// NewDestBB.  \p BrOffset indicates the offset of \p NewDestBB relative to
+  /// the offset of the position to insert the new branch.
+  ///
+  /// \returns The number of bytes added to the block.
+  virtual unsigned insertIndirectBranch(MachineBasicBlock &MBB,
+                                        MachineBasicBlock &NewDestBB,
+                                        const DebugLoc &DL,
+                                        int64_t BrOffset = 0,
+                                        RegScavenger *RS = nullptr) const {
+    llvm_unreachable("target did not implement");
+  }
+
+  /// Analyze the branching code at the end of MBB, returning
+  /// true if it cannot be understood (e.g. it's a switch dispatch or isn't
+  /// implemented for a target).  Upon success, this returns false and returns
+  /// with the following information in various cases:
+  ///
+  /// 1. If this block ends with no branches (it just falls through to its succ)
+  ///    just return false, leaving TBB/FBB null.
+  /// 2. If this block ends with only an unconditional branch, it sets TBB to be
+  ///    the destination block.
+  /// 3. If this block ends with a conditional branch and it falls through to a
+  ///    successor block, it sets TBB to be the branch destination block and a
+  ///    list of operands that evaluate the condition. These operands can be
+  ///    passed to other TargetInstrInfo methods to create new branches.
+  /// 4. If this block ends with a conditional branch followed by an
+  ///    unconditional branch, it returns the 'true' destination in TBB, the
+  ///    'false' destination in FBB, and a list of operands that evaluate the
+  ///    condition.  These operands can be passed to other TargetInstrInfo
+  ///    methods to create new branches.
+  ///
+  /// Note that removeBranch and insertBranch must be implemented to support
+  /// cases where this method returns success.
+  ///
+  /// If AllowModify is true, then this routine is allowed to modify the basic
+  /// block (e.g. delete instructions after the unconditional branch).
+  ///
+  /// The CFG information in MBB.Predecessors and MBB.Successors must be valid
+  /// before calling this function.
+  virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify = false) const {
+    return true;
+  }
+
+  /// Represents a predicate at the MachineFunction level.  The control flow a
+  /// MachineBranchPredicate represents is:
+  ///
+  ///  Reg = LHS `Predicate` RHS         == ConditionDef
+  ///  if Reg then goto TrueDest else goto FalseDest
+  ///
+  struct MachineBranchPredicate {
+    enum ComparePredicate {
+      PRED_EQ,     // True if two values are equal
+      PRED_NE,     // True if two values are not equal
+      PRED_INVALID // Sentinel value
+    };
+
+    ComparePredicate Predicate = PRED_INVALID;
+    MachineOperand LHS = MachineOperand::CreateImm(0);
+    MachineOperand RHS = MachineOperand::CreateImm(0);
+    MachineBasicBlock *TrueDest = nullptr;
+    MachineBasicBlock *FalseDest = nullptr;
+    MachineInstr *ConditionDef = nullptr;
+
+    /// SingleUseCondition is true if ConditionDef is dead except for the
+    /// branch(es) at the end of the basic block.
+    ///
+    bool SingleUseCondition = false;
+
+    explicit MachineBranchPredicate() = default;
+  };
+
+  /// Analyze the branching code at the end of MBB and parse it into the
+  /// MachineBranchPredicate structure if possible.  Returns false on success
+  /// and true on failure.
+  ///
+  /// If AllowModify is true, then this routine is allowed to modify the basic
+  /// block (e.g. delete instructions after the unconditional branch).
+  ///
+  virtual bool analyzeBranchPredicate(MachineBasicBlock &MBB,
+                                      MachineBranchPredicate &MBP,
+                                      bool AllowModify = false) const {
+    return true;
+  }
+
+  /// Remove the branching code at the end of the specific MBB.
+  /// This is only invoked in cases where AnalyzeBranch returns success. It
+  /// returns the number of instructions that were removed.
+  /// If \p BytesRemoved is non-null, report the change in code size from the
+  /// removed instructions.
+  virtual unsigned removeBranch(MachineBasicBlock &MBB,
+                                int *BytesRemoved = nullptr) const {
+    llvm_unreachable("Target didn't implement TargetInstrInfo::removeBranch!");
+  }
+
+  /// Insert branch code into the end of the specified MachineBasicBlock. The
+  /// operands to this method are the same as those returned by AnalyzeBranch.
+  /// This is only invoked in cases where AnalyzeBranch returns success. It
+  /// returns the number of instructions inserted. If \p BytesAdded is non-null,
+  /// report the change in code size from the added instructions.
+  ///
+  /// It is also invoked by tail merging to add unconditional branches in
+  /// cases where AnalyzeBranch doesn't apply because there was no original
+  /// branch to analyze.  At least this much must be implemented, else tail
+  /// merging needs to be disabled.
+  ///
+  /// The CFG information in MBB.Predecessors and MBB.Successors must be valid
+  /// before calling this function.
+  virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                MachineBasicBlock *FBB,
+                                ArrayRef<MachineOperand> Cond,
+                                const DebugLoc &DL,
+                                int *BytesAdded = nullptr) const {
+    llvm_unreachable("Target didn't implement TargetInstrInfo::insertBranch!");
+  }
+
+  unsigned insertUnconditionalBranch(MachineBasicBlock &MBB,
+                                     MachineBasicBlock *DestBB,
+                                     const DebugLoc &DL,
+                                     int *BytesAdded = nullptr) const {
+    return insertBranch(MBB, DestBB, nullptr, ArrayRef<MachineOperand>(), DL,
+                        BytesAdded);
+  }
+
+  /// Analyze the loop code, return true if it cannot be understoo. Upon
+  /// success, this function returns false and returns information about the
+  /// induction variable and compare instruction used at the end.
+  virtual bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst,
+                           MachineInstr *&CmpInst) const {
+    return true;
+  }
+
+  /// Generate code to reduce the loop iteration by one and check if the loop is
+  /// finished.  Return the value/register of the the new loop count.  We need
+  /// this function when peeling off one or more iterations of a loop. This
+  /// function assumes the nth iteration is peeled first.
+  virtual unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineInstr *IndVar,
+                                   MachineInstr &Cmp,
+                                   SmallVectorImpl<MachineOperand> &Cond,
+                                   SmallVectorImpl<MachineInstr *> &PrevInsts,
+                                   unsigned Iter, unsigned MaxIter) const {
+    llvm_unreachable("Target didn't implement ReduceLoopCount");
+  }
+
+  /// Delete the instruction OldInst and everything after it, replacing it with
+  /// an unconditional branch to NewDest. This is used by the tail merging pass.
+  virtual void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+                                       MachineBasicBlock *NewDest) const;
+
+  /// Return true if it's legal to split the given basic
+  /// block at the specified instruction (i.e. instruction would be the start
+  /// of a new basic block).
+  virtual bool isLegalToSplitMBBAt(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI) const {
+    return true;
+  }
+
+  /// Return true if it's profitable to predicate
+  /// instructions with accumulated instruction latency of "NumCycles"
+  /// of the specified basic block, where the probability of the instructions
+  /// being executed is given by Probability, and Confidence is a measure
+  /// of our confidence that it will be properly predicted.
+  virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
+                                   unsigned ExtraPredCycles,
+                                   BranchProbability Probability) const {
+    return false;
+  }
+
+  /// Second variant of isProfitableToIfCvt. This one
+  /// checks for the case where two basic blocks from true and false path
+  /// of a if-then-else (diamond) are predicated on mutally exclusive
+  /// predicates, where the probability of the true path being taken is given
+  /// by Probability, and Confidence is a measure of our confidence that it
+  /// will be properly predicted.
+  virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumTCycles,
+                                   unsigned ExtraTCycles,
+                                   MachineBasicBlock &FMBB, unsigned NumFCycles,
+                                   unsigned ExtraFCycles,
+                                   BranchProbability Probability) const {
+    return false;
+  }
+
+  /// Return true if it's profitable for if-converter to duplicate instructions
+  /// of specified accumulated instruction latencies in the specified MBB to
+  /// enable if-conversion.
+  /// The probability of the instructions being executed is given by
+  /// Probability, and Confidence is a measure of our confidence that it
+  /// will be properly predicted.
+  virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+                                         unsigned NumCycles,
+                                         BranchProbability Probability) const {
+    return false;
+  }
+
+  /// Return true if it's profitable to unpredicate
+  /// one side of a 'diamond', i.e. two sides of if-else predicated on mutually
+  /// exclusive predicates.
+  /// e.g.
+  ///   subeq  r0, r1, #1
+  ///   addne  r0, r1, #1
+  /// =>
+  ///   sub    r0, r1, #1
+  ///   addne  r0, r1, #1
+  ///
+  /// This may be profitable is conditional instructions are always executed.
+  virtual bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+                                         MachineBasicBlock &FMBB) const {
+    return false;
+  }
+
+  /// Return true if it is possible to insert a select
+  /// instruction that chooses between TrueReg and FalseReg based on the
+  /// condition code in Cond.
+  ///
+  /// When successful, also return the latency in cycles from TrueReg,
+  /// FalseReg, and Cond to the destination register. In most cases, a select
+  /// instruction will be 1 cycle, so CondCycles = TrueCycles = FalseCycles = 1
+  ///
+  /// Some x86 implementations have 2-cycle cmov instructions.
+  ///
+  /// @param MBB         Block where select instruction would be inserted.
+  /// @param Cond        Condition returned by AnalyzeBranch.
+  /// @param TrueReg     Virtual register to select when Cond is true.
+  /// @param FalseReg    Virtual register to select when Cond is false.
+  /// @param CondCycles  Latency from Cond+Branch to select output.
+  /// @param TrueCycles  Latency from TrueReg to select output.
+  /// @param FalseCycles Latency from FalseReg to select output.
+  virtual bool canInsertSelect(const MachineBasicBlock &MBB,
+                               ArrayRef<MachineOperand> Cond, unsigned TrueReg,
+                               unsigned FalseReg, int &CondCycles,
+                               int &TrueCycles, int &FalseCycles) const {
+    return false;
+  }
+
+  /// Insert a select instruction into MBB before I that will copy TrueReg to
+  /// DstReg when Cond is true, and FalseReg to DstReg when Cond is false.
+  ///
+  /// This function can only be called after canInsertSelect() returned true.
+  /// The condition in Cond comes from AnalyzeBranch, and it can be assumed
+  /// that the same flags or registers required by Cond are available at the
+  /// insertion point.
+  ///
+  /// @param MBB      Block where select instruction should be inserted.
+  /// @param I        Insertion point.
+  /// @param DL       Source location for debugging.
+  /// @param DstReg   Virtual register to be defined by select instruction.
+  /// @param Cond     Condition as computed by AnalyzeBranch.
+  /// @param TrueReg  Virtual register to copy when Cond is true.
+  /// @param FalseReg Virtual register to copy when Cons is false.
+  virtual void insertSelect(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator I, const DebugLoc &DL,
+                            unsigned DstReg, ArrayRef<MachineOperand> Cond,
+                            unsigned TrueReg, unsigned FalseReg) const {
+    llvm_unreachable("Target didn't implement TargetInstrInfo::insertSelect!");
+  }
+
+  /// Analyze the given select instruction, returning true if
+  /// it cannot be understood. It is assumed that MI->isSelect() is true.
+  ///
+  /// When successful, return the controlling condition and the operands that
+  /// determine the true and false result values.
+  ///
+  ///   Result = SELECT Cond, TrueOp, FalseOp
+  ///
+  /// Some targets can optimize select instructions, for example by predicating
+  /// the instruction defining one of the operands. Such targets should set
+  /// Optimizable.
+  ///
+  /// @param         MI Select instruction to analyze.
+  /// @param Cond    Condition controlling the select.
+  /// @param TrueOp  Operand number of the value selected when Cond is true.
+  /// @param FalseOp Operand number of the value selected when Cond is false.
+  /// @param Optimizable Returned as true if MI is optimizable.
+  /// @returns False on success.
+  virtual bool analyzeSelect(const MachineInstr &MI,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             unsigned &TrueOp, unsigned &FalseOp,
+                             bool &Optimizable) const {
+    assert(MI.getDesc().isSelect() && "MI must be a select instruction");
+    return true;
+  }
+
+  /// Given a select instruction that was understood by
+  /// analyzeSelect and returned Optimizable = true, attempt to optimize MI by
+  /// merging it with one of its operands. Returns NULL on failure.
+  ///
+  /// When successful, returns the new select instruction. The client is
+  /// responsible for deleting MI.
+  ///
+  /// If both sides of the select can be optimized, PreferFalse is used to pick
+  /// a side.
+  ///
+  /// @param MI          Optimizable select instruction.
+  /// @param NewMIs     Set that record all MIs in the basic block up to \p
+  /// MI. Has to be updated with any newly created MI or deleted ones.
+  /// @param PreferFalse Try to optimize FalseOp instead of TrueOp.
+  /// @returns Optimized instruction or NULL.
+  virtual MachineInstr *optimizeSelect(MachineInstr &MI,
+                                       SmallPtrSetImpl<MachineInstr *> &NewMIs,
+                                       bool PreferFalse = false) const {
+    // This function must be implemented if Optimizable is ever set.
+    llvm_unreachable("Target must implement TargetInstrInfo::optimizeSelect!");
+  }
+
+  /// Emit instructions to copy a pair of physical registers.
+  ///
+  /// This function should support copies within any legal register class as
+  /// well as any cross-class copies created during instruction selection.
+  ///
+  /// The source and destination registers may overlap, which may require a
+  /// careful implementation when multiple copy instructions are required for
+  /// large registers. See for example the ARM target.
+  virtual void copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MI, const DebugLoc &DL,
+                           unsigned DestReg, unsigned SrcReg,
+                           bool KillSrc) const {
+    llvm_unreachable("Target didn't implement TargetInstrInfo::copyPhysReg!");
+  }
+
+  /// Store the specified register of the given register class to the specified
+  /// stack frame index. The store instruction is to be added to the given
+  /// machine basic block before the specified machine instruction. If isKill
+  /// is true, the register operand is the last use and must be marked kill.
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const {
+    llvm_unreachable("Target didn't implement "
+                     "TargetInstrInfo::storeRegToStackSlot!");
+  }
+
+  /// Load the specified register of the given register class from the specified
+  /// stack frame index. The load instruction is to be added to the given
+  /// machine basic block before the specified machine instruction.
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const {
+    llvm_unreachable("Target didn't implement "
+                     "TargetInstrInfo::loadRegFromStackSlot!");
+  }
+
+  /// This function is called for all pseudo instructions
+  /// that remain after register allocation. Many pseudo instructions are
+  /// created to help register allocation. This is the place to convert them
+  /// into real instructions. The target can edit MI in place, or it can insert
+  /// new instructions and erase MI. The function should return true if
+  /// anything was changed.
+  virtual bool expandPostRAPseudo(MachineInstr &MI) const { return false; }
+
+  /// Check whether the target can fold a load that feeds a subreg operand
+  /// (or a subreg operand that feeds a store).
+  /// For example, X86 may want to return true if it can fold
+  /// movl (%esp), %eax
+  /// subb, %al, ...
+  /// Into:
+  /// subb (%esp), ...
+  ///
+  /// Ideally, we'd like the target implementation of foldMemoryOperand() to
+  /// reject subregs - but since this behavior used to be enforced in the
+  /// target-independent code, moving this responsibility to the targets
+  /// has the potential of causing nasty silent breakage in out-of-tree targets.
+  virtual bool isSubregFoldable() const { return false; }
+
+  /// Attempt to fold a load or store of the specified stack
+  /// slot into the specified machine instruction for the specified operand(s).
+  /// If this is possible, a new instruction is returned with the specified
+  /// operand folded, otherwise NULL is returned.
+  /// The new instruction is inserted before MI, and the client is responsible
+  /// for removing the old instruction.
+  MachineInstr *foldMemoryOperand(MachineInstr &MI, ArrayRef<unsigned> Ops,
+                                  int FrameIndex,
+                                  LiveIntervals *LIS = nullptr) const;
+
+  /// Same as the previous version except it allows folding of any load and
+  /// store from / to any address, not just from a specific stack slot.
+  MachineInstr *foldMemoryOperand(MachineInstr &MI, ArrayRef<unsigned> Ops,
+                                  MachineInstr &LoadMI,
+                                  LiveIntervals *LIS = nullptr) const;
+
+  /// Return true when there is potentially a faster code sequence
+  /// for an instruction chain ending in \p Root. All potential patterns are
+  /// returned in the \p Pattern vector. Pattern should be sorted in priority
+  /// order since the pattern evaluator stops checking as soon as it finds a
+  /// faster sequence.
+  /// \param Root - Instruction that could be combined with one of its operands
+  /// \param Patterns - Vector of possible combination patterns
+  virtual bool getMachineCombinerPatterns(
+      MachineInstr &Root,
+      SmallVectorImpl<MachineCombinerPattern> &Patterns) const;
+
+  /// Return true when a code sequence can improve throughput. It
+  /// should be called only for instructions in loops.
+  /// \param Pattern - combiner pattern
+  virtual bool isThroughputPattern(MachineCombinerPattern Pattern) const;
+
+  /// Return true if the input \P Inst is part of a chain of dependent ops
+  /// that are suitable for reassociation, otherwise return false.
+  /// If the instruction's operands must be commuted to have a previous
+  /// instruction of the same type define the first source operand, \P Commuted
+  /// will be set to true.
+  bool isReassociationCandidate(const MachineInstr &Inst, bool &Commuted) const;
+
+  /// Return true when \P Inst is both associative and commutative.
+  virtual bool isAssociativeAndCommutative(const MachineInstr &Inst) const {
+    return false;
+  }
+
+  /// Return true when \P Inst has reassociable operands in the same \P MBB.
+  virtual bool hasReassociableOperands(const MachineInstr &Inst,
+                                       const MachineBasicBlock *MBB) const;
+
+  /// Return true when \P Inst has reassociable sibling.
+  bool hasReassociableSibling(const MachineInstr &Inst, bool &Commuted) const;
+
+  /// When getMachineCombinerPatterns() finds patterns, this function generates
+  /// the instructions that could replace the original code sequence. The client
+  /// has to decide whether the actual replacement is beneficial or not.
+  /// \param Root - Instruction that could be combined with one of its operands
+  /// \param Pattern - Combination pattern for Root
+  /// \param InsInstrs - Vector of new instructions that implement P
+  /// \param DelInstrs - Old instructions, including Root, that could be
+  /// replaced by InsInstr
+  /// \param InstrIdxForVirtReg - map of virtual register to instruction in
+  /// InsInstr that defines it
+  virtual void genAlternativeCodeSequence(
+      MachineInstr &Root, MachineCombinerPattern Pattern,
+      SmallVectorImpl<MachineInstr *> &InsInstrs,
+      SmallVectorImpl<MachineInstr *> &DelInstrs,
+      DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const;
+
+  /// Attempt to reassociate \P Root and \P Prev according to \P Pattern to
+  /// reduce critical path length.
+  void reassociateOps(MachineInstr &Root, MachineInstr &Prev,
+                      MachineCombinerPattern Pattern,
+                      SmallVectorImpl<MachineInstr *> &InsInstrs,
+                      SmallVectorImpl<MachineInstr *> &DelInstrs,
+                      DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const;
+
+  /// This is an architecture-specific helper function of reassociateOps.
+  /// Set special operand attributes for new instructions after reassociation.
+  virtual void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2,
+                                     MachineInstr &NewMI1,
+                                     MachineInstr &NewMI2) const {}
+
+  /// Return true when a target supports MachineCombiner.
+  virtual bool useMachineCombiner() const { return false; }
+
+protected:
+  /// Target-dependent implementation for foldMemoryOperand.
+  /// Target-independent code in foldMemoryOperand will
+  /// take care of adding a MachineMemOperand to the newly created instruction.
+  /// The instruction and any auxiliary instructions necessary will be inserted
+  /// at InsertPt.
+  virtual MachineInstr *
+  foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
+                        ArrayRef<unsigned> Ops,
+                        MachineBasicBlock::iterator InsertPt, int FrameIndex,
+                        LiveIntervals *LIS = nullptr) const {
+    return nullptr;
+  }
+
+  /// Target-dependent implementation for foldMemoryOperand.
+  /// Target-independent code in foldMemoryOperand will
+  /// take care of adding a MachineMemOperand to the newly created instruction.
+  /// The instruction and any auxiliary instructions necessary will be inserted
+  /// at InsertPt.
+  virtual MachineInstr *foldMemoryOperandImpl(
+      MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
+      MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI,
+      LiveIntervals *LIS = nullptr) const {
+    return nullptr;
+  }
+
+  /// \brief Target-dependent implementation of getRegSequenceInputs.
+  ///
+  /// \returns true if it is possible to build the equivalent
+  /// REG_SEQUENCE inputs with the pair \p MI, \p DefIdx. False otherwise.
+  ///
+  /// \pre MI.isRegSequenceLike().
+  ///
+  /// \see TargetInstrInfo::getRegSequenceInputs.
+  virtual bool getRegSequenceLikeInputs(
+      const MachineInstr &MI, unsigned DefIdx,
+      SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
+    return false;
+  }
+
+  /// \brief Target-dependent implementation of getExtractSubregInputs.
+  ///
+  /// \returns true if it is possible to build the equivalent
+  /// EXTRACT_SUBREG inputs with the pair \p MI, \p DefIdx. False otherwise.
+  ///
+  /// \pre MI.isExtractSubregLike().
+  ///
+  /// \see TargetInstrInfo::getExtractSubregInputs.
+  virtual bool getExtractSubregLikeInputs(const MachineInstr &MI,
+                                          unsigned DefIdx,
+                                          RegSubRegPairAndIdx &InputReg) const {
+    return false;
+  }
+
+  /// \brief Target-dependent implementation of getInsertSubregInputs.
+  ///
+  /// \returns true if it is possible to build the equivalent
+  /// INSERT_SUBREG inputs with the pair \p MI, \p DefIdx. False otherwise.
+  ///
+  /// \pre MI.isInsertSubregLike().
+  ///
+  /// \see TargetInstrInfo::getInsertSubregInputs.
+  virtual bool
+  getInsertSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx,
+                            RegSubRegPair &BaseReg,
+                            RegSubRegPairAndIdx &InsertedReg) const {
+    return false;
+  }
+
+public:
+  /// getAddressSpaceForPseudoSourceKind - Given the kind of memory
+  /// (e.g. stack) the target returns the corresponding address space.
+  virtual unsigned
+  getAddressSpaceForPseudoSourceKind(PseudoSourceValue::PSVKind Kind) const {
+    return 0;
+  }
+
+  /// unfoldMemoryOperand - Separate a single instruction which folded a load or
+  /// a store or a load and a store into two or more instruction. If this is
+  /// possible, returns true as well as the new instructions by reference.
+  virtual bool
+  unfoldMemoryOperand(MachineFunction &MF, MachineInstr &MI, unsigned Reg,
+                      bool UnfoldLoad, bool UnfoldStore,
+                      SmallVectorImpl<MachineInstr *> &NewMIs) const {
+    return false;
+  }
+
+  virtual bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+                                   SmallVectorImpl<SDNode *> &NewNodes) const {
+    return false;
+  }
+
+  /// Returns the opcode of the would be new
+  /// instruction after load / store are unfolded from an instruction of the
+  /// specified opcode. It returns zero if the specified unfolding is not
+  /// possible. If LoadRegIndex is non-null, it is filled in with the operand
+  /// index of the operand which will hold the register holding the loaded
+  /// value.
+  virtual unsigned
+  getOpcodeAfterMemoryUnfold(unsigned Opc, bool UnfoldLoad, bool UnfoldStore,
+                             unsigned *LoadRegIndex = nullptr) const {
+    return 0;
+  }
+
+  /// This is used by the pre-regalloc scheduler to determine if two loads are
+  /// loading from the same base address. It should only return true if the base
+  /// pointers are the same and the only differences between the two addresses
+  /// are the offset. It also returns the offsets by reference.
+  virtual bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+                                       int64_t &Offset1,
+                                       int64_t &Offset2) const {
+    return false;
+  }
+
+  /// This is a used by the pre-regalloc scheduler to determine (in conjunction
+  /// with areLoadsFromSameBasePtr) if two loads should be scheduled together.
+  /// On some targets if two loads are loading from
+  /// addresses in the same cache line, it's better if they are scheduled
+  /// together. This function takes two integers that represent the load offsets
+  /// from the common base address. It returns true if it decides it's desirable
+  /// to schedule the two loads together. "NumLoads" is the number of loads that
+  /// have already been scheduled after Load1.
+  virtual bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+                                       int64_t Offset1, int64_t Offset2,
+                                       unsigned NumLoads) const {
+    return false;
+  }
+
+  /// Get the base register and byte offset of an instruction that reads/writes
+  /// memory.
+  virtual bool getMemOpBaseRegImmOfs(MachineInstr &MemOp, unsigned &BaseReg,
+                                     int64_t &Offset,
+                                     const TargetRegisterInfo *TRI) const {
+    return false;
+  }
+
+  /// Return true if the instruction contains a base register and offset. If
+  /// true, the function also sets the operand position in the instruction
+  /// for the base register and offset.
+  virtual bool getBaseAndOffsetPosition(const MachineInstr &MI,
+                                        unsigned &BasePos,
+                                        unsigned &OffsetPos) const {
+    return false;
+  }
+
+  /// If the instruction is an increment of a constant value, return the amount.
+  virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const {
+    return false;
+  }
+
+  /// Returns true if the two given memory operations should be scheduled
+  /// adjacent. Note that you have to add:
+  ///   DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+  /// or
+  ///   DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
+  /// to TargetPassConfig::createMachineScheduler() to have an effect.
+  virtual bool shouldClusterMemOps(MachineInstr &FirstLdSt, unsigned BaseReg1,
+                                   MachineInstr &SecondLdSt, unsigned BaseReg2,
+                                   unsigned NumLoads) const {
+    llvm_unreachable("target did not implement shouldClusterMemOps()");
+  }
+
+  /// Reverses the branch condition of the specified condition list,
+  /// returning false on success and true if it cannot be reversed.
+  virtual bool
+  reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+    return true;
+  }
+
+  /// Insert a noop into the instruction stream at the specified point.
+  virtual void insertNoop(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MI) const;
+
+  /// Return the noop instruction to use for a noop.
+  virtual void getNoop(MCInst &NopInst) const;
+
+  /// Return true for post-incremented instructions.
+  virtual bool isPostIncrement(const MachineInstr &MI) const { return false; }
+
+  /// Returns true if the instruction is already predicated.
+  virtual bool isPredicated(const MachineInstr &MI) const { return false; }
+
+  /// Returns true if the instruction is a
+  /// terminator instruction that has not been predicated.
+  virtual bool isUnpredicatedTerminator(const MachineInstr &MI) const;
+
+  /// Returns true if MI is an unconditional tail call.
+  virtual bool isUnconditionalTailCall(const MachineInstr &MI) const {
+    return false;
+  }
+
+  /// Returns true if the tail call can be made conditional on BranchCond.
+  virtual bool canMakeTailCallConditional(SmallVectorImpl<MachineOperand> &Cond,
+                                          const MachineInstr &TailCall) const {
+    return false;
+  }
+
+  /// Replace the conditional branch in MBB with a conditional tail call.
+  virtual void replaceBranchWithTailCall(MachineBasicBlock &MBB,
+                                         SmallVectorImpl<MachineOperand> &Cond,
+                                         const MachineInstr &TailCall) const {
+    llvm_unreachable("Target didn't implement replaceBranchWithTailCall!");
+  }
+
+  /// Convert the instruction into a predicated instruction.
+  /// It returns true if the operation was successful.
+  virtual bool PredicateInstruction(MachineInstr &MI,
+                                    ArrayRef<MachineOperand> Pred) const;
+
+  /// Returns true if the first specified predicate
+  /// subsumes the second, e.g. GE subsumes GT.
+  virtual bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
+                                 ArrayRef<MachineOperand> Pred2) const {
+    return false;
+  }
+
+  /// If the specified instruction defines any predicate
+  /// or condition code register(s) used for predication, returns true as well
+  /// as the definition predicate(s) by reference.
+  virtual bool DefinesPredicate(MachineInstr &MI,
+                                std::vector<MachineOperand> &Pred) const {
+    return false;
+  }
+
+  /// Return true if the specified instruction can be predicated.
+  /// By default, this returns true for every instruction with a
+  /// PredicateOperand.
+  virtual bool isPredicable(const MachineInstr &MI) const {
+    return MI.getDesc().isPredicable();
+  }
+
+  /// Return true if it's safe to move a machine
+  /// instruction that defines the specified register class.
+  virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
+    return true;
+  }
+
+  /// Test if the given instruction should be considered a scheduling boundary.
+  /// This primarily includes labels and terminators.
+  virtual bool isSchedulingBoundary(const MachineInstr &MI,
+                                    const MachineBasicBlock *MBB,
+                                    const MachineFunction &MF) const;
+
+  /// Measure the specified inline asm to determine an approximation of its
+  /// length.
+  virtual unsigned getInlineAsmLength(const char *Str,
+                                      const MCAsmInfo &MAI) const;
+
+  /// Allocate and return a hazard recognizer to use for this target when
+  /// scheduling the machine instructions before register allocation.
+  virtual ScheduleHazardRecognizer *
+  CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
+                               const ScheduleDAG *DAG) const;
+
+  /// Allocate and return a hazard recognizer to use for this target when
+  /// scheduling the machine instructions before register allocation.
+  virtual ScheduleHazardRecognizer *
+  CreateTargetMIHazardRecognizer(const InstrItineraryData *,
+                                 const ScheduleDAG *DAG) const;
+
+  /// Allocate and return a hazard recognizer to use for this target when
+  /// scheduling the machine instructions after register allocation.
+  virtual ScheduleHazardRecognizer *
+  CreateTargetPostRAHazardRecognizer(const InstrItineraryData *,
+                                     const ScheduleDAG *DAG) const;
+
+  /// Allocate and return a hazard recognizer to use for by non-scheduling
+  /// passes.
+  virtual ScheduleHazardRecognizer *
+  CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const {
+    return nullptr;
+  }
+
+  /// Provide a global flag for disabling the PreRA hazard recognizer that
+  /// targets may choose to honor.
+  bool usePreRAHazardRecognizer() const;
+
+  /// For a comparison instruction, return the source registers
+  /// in SrcReg and SrcReg2 if having two register operands, and the value it
+  /// compares against in CmpValue. Return true if the comparison instruction
+  /// can be analyzed.
+  virtual bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
+                              unsigned &SrcReg2, int &Mask, int &Value) const {
+    return false;
+  }
+
+  /// See if the comparison instruction can be converted
+  /// into something more efficient. E.g., on ARM most instructions can set the
+  /// flags register, obviating the need for a separate CMP.
+  virtual bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
+                                    unsigned SrcReg2, int Mask, int Value,
+                                    const MachineRegisterInfo *MRI) const {
+    return false;
+  }
+  virtual bool optimizeCondBranch(MachineInstr &MI) const { return false; }
+
+  /// Try to remove the load by folding it to a register operand at the use.
+  /// We fold the load instructions if and only if the
+  /// def and use are in the same BB. We only look at one load and see
+  /// whether it can be folded into MI. FoldAsLoadDefReg is the virtual register
+  /// defined by the load we are trying to fold. DefMI returns the machine
+  /// instruction that defines FoldAsLoadDefReg, and the function returns
+  /// the machine instruction generated due to folding.
+  virtual MachineInstr *optimizeLoadInstr(MachineInstr &MI,
+                                          const MachineRegisterInfo *MRI,
+                                          unsigned &FoldAsLoadDefReg,
+                                          MachineInstr *&DefMI) const {
+    return nullptr;
+  }
+
+  /// 'Reg' is known to be defined by a move immediate instruction,
+  /// try to fold the immediate into the use instruction.
+  /// If MRI->hasOneNonDBGUse(Reg) is true, and this function returns true,
+  /// then the caller may assume that DefMI has been erased from its parent
+  /// block. The caller may assume that it will not be erased by this
+  /// function otherwise.
+  virtual bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
+                             unsigned Reg, MachineRegisterInfo *MRI) const {
+    return false;
+  }
+
+  /// Return the number of u-operations the given machine
+  /// instruction will be decoded to on the target cpu. The itinerary's
+  /// IssueWidth is the number of microops that can be dispatched each
+  /// cycle. An instruction with zero microops takes no dispatch resources.
+  virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData,
+                                  const MachineInstr &MI) const;
+
+  /// Return true for pseudo instructions that don't consume any
+  /// machine resources in their current form. These are common cases that the
+  /// scheduler should consider free, rather than conservatively handling them
+  /// as instructions with no itinerary.
+  bool isZeroCost(unsigned Opcode) const {
+    return Opcode <= TargetOpcode::COPY;
+  }
+
+  virtual int getOperandLatency(const InstrItineraryData *ItinData,
+                                SDNode *DefNode, unsigned DefIdx,
+                                SDNode *UseNode, unsigned UseIdx) const;
+
+  /// Compute and return the use operand latency of a given pair of def and use.
+  /// In most cases, the static scheduling itinerary was enough to determine the
+  /// operand latency. But it may not be possible for instructions with variable
+  /// number of defs / uses.
+  ///
+  /// This is a raw interface to the itinerary that may be directly overridden
+  /// by a target. Use computeOperandLatency to get the best estimate of
+  /// latency.
+  virtual int getOperandLatency(const InstrItineraryData *ItinData,
+                                const MachineInstr &DefMI, unsigned DefIdx,
+                                const MachineInstr &UseMI,
+                                unsigned UseIdx) const;
+
+  /// Compute the instruction latency of a given instruction.
+  /// If the instruction has higher cost when predicated, it's returned via
+  /// PredCost.
+  virtual unsigned getInstrLatency(const InstrItineraryData *ItinData,
+                                   const MachineInstr &MI,
+                                   unsigned *PredCost = nullptr) const;
+
+  virtual unsigned getPredicationCost(const MachineInstr &MI) const;
+
+  virtual int getInstrLatency(const InstrItineraryData *ItinData,
+                              SDNode *Node) const;
+
+  /// Return the default expected latency for a def based on its opcode.
+  unsigned defaultDefLatency(const MCSchedModel &SchedModel,
+                             const MachineInstr &DefMI) const;
+
+  int computeDefOperandLatency(const InstrItineraryData *ItinData,
+                               const MachineInstr &DefMI) const;
+
+  /// Return true if this opcode has high latency to its result.
+  virtual bool isHighLatencyDef(int opc) const { return false; }
+
+  /// Compute operand latency between a def of 'Reg'
+  /// and a use in the current loop. Return true if the target considered
+  /// it 'high'. This is used by optimization passes such as machine LICM to
+  /// determine whether it makes sense to hoist an instruction out even in a
+  /// high register pressure situation.
+  virtual bool hasHighOperandLatency(const TargetSchedModel &SchedModel,
+                                     const MachineRegisterInfo *MRI,
+                                     const MachineInstr &DefMI, unsigned DefIdx,
+                                     const MachineInstr &UseMI,
+                                     unsigned UseIdx) const {
+    return false;
+  }
+
+  /// Compute operand latency of a def of 'Reg'. Return true
+  /// if the target considered it 'low'.
+  virtual bool hasLowDefLatency(const TargetSchedModel &SchedModel,
+                                const MachineInstr &DefMI,
+                                unsigned DefIdx) const;
+
+  /// Perform target-specific instruction verification.
+  virtual bool verifyInstruction(const MachineInstr &MI,
+                                 StringRef &ErrInfo) const {
+    return true;
+  }
+
+  /// Return the current execution domain and bit mask of
+  /// possible domains for instruction.
+  ///
+  /// Some micro-architectures have multiple execution domains, and multiple
+  /// opcodes that perform the same operation in different domains.  For
+  /// example, the x86 architecture provides the por, orps, and orpd
+  /// instructions that all do the same thing.  There is a latency penalty if a
+  /// register is written in one domain and read in another.
+  ///
+  /// This function returns a pair (domain, mask) containing the execution
+  /// domain of MI, and a bit mask of possible domains.  The setExecutionDomain
+  /// function can be used to change the opcode to one of the domains in the
+  /// bit mask.  Instructions whose execution domain can't be changed should
+  /// return a 0 mask.
+  ///
+  /// The execution domain numbers don't have any special meaning except domain
+  /// 0 is used for instructions that are not associated with any interesting
+  /// execution domain.
+  ///
+  virtual std::pair<uint16_t, uint16_t>
+  getExecutionDomain(const MachineInstr &MI) const {
+    return std::make_pair(0, 0);
+  }
+
+  /// Change the opcode of MI to execute in Domain.
+  ///
+  /// The bit (1 << Domain) must be set in the mask returned from
+  /// getExecutionDomain(MI).
+  virtual void setExecutionDomain(MachineInstr &MI, unsigned Domain) const {}
+
+  /// Returns the preferred minimum clearance
+  /// before an instruction with an unwanted partial register update.
+  ///
+  /// Some instructions only write part of a register, and implicitly need to
+  /// read the other parts of the register.  This may cause unwanted stalls
+  /// preventing otherwise unrelated instructions from executing in parallel in
+  /// an out-of-order CPU.
+  ///
+  /// For example, the x86 instruction cvtsi2ss writes its result to bits
+  /// [31:0] of the destination xmm register. Bits [127:32] are unaffected, so
+  /// the instruction needs to wait for the old value of the register to become
+  /// available:
+  ///
+  ///   addps %xmm1, %xmm0
+  ///   movaps %xmm0, (%rax)
+  ///   cvtsi2ss %rbx, %xmm0
+  ///
+  /// In the code above, the cvtsi2ss instruction needs to wait for the addps
+  /// instruction before it can issue, even though the high bits of %xmm0
+  /// probably aren't needed.
+  ///
+  /// This hook returns the preferred clearance before MI, measured in
+  /// instructions.  Other defs of MI's operand OpNum are avoided in the last N
+  /// instructions before MI.  It should only return a positive value for
+  /// unwanted dependencies.  If the old bits of the defined register have
+  /// useful values, or if MI is determined to otherwise read the dependency,
+  /// the hook should return 0.
+  ///
+  /// The unwanted dependency may be handled by:
+  ///
+  /// 1. Allocating the same register for an MI def and use.  That makes the
+  ///    unwanted dependency identical to a required dependency.
+  ///
+  /// 2. Allocating a register for the def that has no defs in the previous N
+  ///    instructions.
+  ///
+  /// 3. Calling breakPartialRegDependency() with the same arguments.  This
+  ///    allows the target to insert a dependency breaking instruction.
+  ///
+  virtual unsigned
+  getPartialRegUpdateClearance(const MachineInstr &MI, unsigned OpNum,
+                               const TargetRegisterInfo *TRI) const {
+    // The default implementation returns 0 for no partial register dependency.
+    return 0;
+  }
+
+  /// \brief Return the minimum clearance before an instruction that reads an
+  /// unused register.
+  ///
+  /// For example, AVX instructions may copy part of a register operand into
+  /// the unused high bits of the destination register.
+  ///
+  /// vcvtsi2sdq %rax, undef %xmm0, %xmm14
+  ///
+  /// In the code above, vcvtsi2sdq copies %xmm0[127:64] into %xmm14 creating a
+  /// false dependence on any previous write to %xmm0.
+  ///
+  /// This hook works similarly to getPartialRegUpdateClearance, except that it
+  /// does not take an operand index. Instead sets \p OpNum to the index of the
+  /// unused register.
+  virtual unsigned getUndefRegClearance(const MachineInstr &MI, unsigned &OpNum,
+                                        const TargetRegisterInfo *TRI) const {
+    // The default implementation returns 0 for no undef register dependency.
+    return 0;
+  }
+
+  /// Insert a dependency-breaking instruction
+  /// before MI to eliminate an unwanted dependency on OpNum.
+  ///
+  /// If it wasn't possible to avoid a def in the last N instructions before MI
+  /// (see getPartialRegUpdateClearance), this hook will be called to break the
+  /// unwanted dependency.
+  ///
+  /// On x86, an xorps instruction can be used as a dependency breaker:
+  ///
+  ///   addps %xmm1, %xmm0
+  ///   movaps %xmm0, (%rax)
+  ///   xorps %xmm0, %xmm0
+  ///   cvtsi2ss %rbx, %xmm0
+  ///
+  /// An <imp-kill> operand should be added to MI if an instruction was
+  /// inserted.  This ties the instructions together in the post-ra scheduler.
+  ///
+  virtual void breakPartialRegDependency(MachineInstr &MI, unsigned OpNum,
+                                         const TargetRegisterInfo *TRI) const {}
+
+  /// Create machine specific model for scheduling.
+  virtual DFAPacketizer *
+  CreateTargetScheduleState(const TargetSubtargetInfo &) const {
+    return nullptr;
+  }
+
+  /// Sometimes, it is possible for the target
+  /// to tell, even without aliasing information, that two MIs access different
+  /// memory addresses. This function returns true if two MIs access different
+  /// memory addresses and false otherwise.
+  ///
+  /// Assumes any physical registers used to compute addresses have the same
+  /// value for both instructions. (This is the most useful assumption for
+  /// post-RA scheduling.)
+  ///
+  /// See also MachineInstr::mayAlias, which is implemented on top of this
+  /// function.
+  virtual bool
+  areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
+                                  AliasAnalysis *AA = nullptr) const {
+    assert((MIa.mayLoad() || MIa.mayStore()) &&
+           "MIa must load from or modify a memory location");
+    assert((MIb.mayLoad() || MIb.mayStore()) &&
+           "MIb must load from or modify a memory location");
+    return false;
+  }
+
+  /// \brief Return the value to use for the MachineCSE's LookAheadLimit,
+  /// which is a heuristic used for CSE'ing phys reg defs.
+  virtual unsigned getMachineCSELookAheadLimit() const {
+    // The default lookahead is small to prevent unprofitable quadratic
+    // behavior.
+    return 5;
+  }
+
+  /// Return an array that contains the ids of the target indices (used for the
+  /// TargetIndex machine operand) and their names.
+  ///
+  /// MIR Serialization is able to serialize only the target indices that are
+  /// defined by this method.
+  virtual ArrayRef<std::pair<int, const char *>>
+  getSerializableTargetIndices() const {
+    return None;
+  }
+
+  /// Decompose the machine operand's target flags into two values - the direct
+  /// target flag value and any of bit flags that are applied.
+  virtual std::pair<unsigned, unsigned>
+  decomposeMachineOperandsTargetFlags(unsigned /*TF*/) const {
+    return std::make_pair(0u, 0u);
+  }
+
+  /// Return an array that contains the direct target flag values and their
+  /// names.
+  ///
+  /// MIR Serialization is able to serialize only the target flags that are
+  /// defined by this method.
+  virtual ArrayRef<std::pair<unsigned, const char *>>
+  getSerializableDirectMachineOperandTargetFlags() const {
+    return None;
+  }
+
+  /// Return an array that contains the bitmask target flag values and their
+  /// names.
+  ///
+  /// MIR Serialization is able to serialize only the target flags that are
+  /// defined by this method.
+  virtual ArrayRef<std::pair<unsigned, const char *>>
+  getSerializableBitmaskMachineOperandTargetFlags() const {
+    return None;
+  }
+
+  /// Return an array that contains the MMO target flag values and their
+  /// names.
+  ///
+  /// MIR Serialization is able to serialize only the MMO target flags that are
+  /// defined by this method.
+  virtual ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
+  getSerializableMachineMemOperandTargetFlags() const {
+    return None;
+  }
+
+  /// Determines whether \p Inst is a tail call instruction. Override this
+  /// method on targets that do not properly set MCID::Return and MCID::Call on
+  /// tail call instructions."
+  virtual bool isTailCall(const MachineInstr &Inst) const {
+    return Inst.isReturn() && Inst.isCall();
+  }
+
+  /// True if the instruction is bound to the top of its basic block and no
+  /// other instructions shall be inserted before it. This can be implemented
+  /// to prevent register allocator to insert spills before such instructions.
+  virtual bool isBasicBlockPrologue(const MachineInstr &MI) const {
+    return false;
+  }
+
+  /// \brief Describes the number of instructions that it will take to call and
+  /// construct a frame for a given outlining candidate.
+  struct MachineOutlinerInfo {
+    /// Number of instructions to call an outlined function for this candidate.
+    unsigned CallOverhead;
+
+    /// \brief Number of instructions to construct an outlined function frame
+    /// for this candidate.
+    unsigned FrameOverhead;
+
+    /// \brief Represents the specific instructions that must be emitted to
+    /// construct a call to this candidate.
+    unsigned CallConstructionID;
+
+    /// \brief Represents the specific instructions that must be emitted to
+    /// construct a frame for this candidate's outlined function.
+    unsigned FrameConstructionID;
+
+    MachineOutlinerInfo() {}
+    MachineOutlinerInfo(unsigned CallOverhead, unsigned FrameOverhead,
+                        unsigned CallConstructionID,
+                        unsigned FrameConstructionID)
+        : CallOverhead(CallOverhead), FrameOverhead(FrameOverhead),
+          CallConstructionID(CallConstructionID),
+          FrameConstructionID(FrameConstructionID) {}
+  };
+
+  /// \brief Returns a \p MachineOutlinerInfo struct containing target-specific
+  /// information for a set of outlining candidates.
+  virtual MachineOutlinerInfo getOutlininingCandidateInfo(
+      std::vector<
+          std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
+          &RepeatedSequenceLocs) const {
+    llvm_unreachable(
+        "Target didn't implement TargetInstrInfo::getOutliningOverhead!");
+  }
+
+  /// Represents how an instruction should be mapped by the outliner.
+  /// \p Legal instructions are those which are safe to outline.
+  /// \p Illegal instructions are those which cannot be outlined.
+  /// \p Invisible instructions are instructions which can be outlined, but
+  /// shouldn't actually impact the outlining result.
+  enum MachineOutlinerInstrType { Legal, Illegal, Invisible };
+
+  /// Returns how or if \p MI should be outlined.
+  virtual MachineOutlinerInstrType getOutliningType(MachineInstr &MI) const {
+    llvm_unreachable(
+        "Target didn't implement TargetInstrInfo::getOutliningType!");
+  }
+
+  /// Insert a custom epilogue for outlined functions.
+  /// This may be empty, in which case no epilogue or return statement will be
+  /// emitted.
+  virtual void insertOutlinerEpilogue(MachineBasicBlock &MBB,
+                                      MachineFunction &MF,
+                                      const MachineOutlinerInfo &MInfo) const {
+    llvm_unreachable(
+        "Target didn't implement TargetInstrInfo::insertOutlinerEpilogue!");
+  }
+
+  /// Insert a call to an outlined function into the program.
+  /// Returns an iterator to the spot where we inserted the call. This must be
+  /// implemented by the target.
+  virtual MachineBasicBlock::iterator
+  insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
+                     MachineBasicBlock::iterator &It, MachineFunction &MF,
+                     const MachineOutlinerInfo &MInfo) const {
+    llvm_unreachable(
+        "Target didn't implement TargetInstrInfo::insertOutlinedCall!");
+  }
+
+  /// Insert a custom prologue for outlined functions.
+  /// This may be empty, in which case no prologue will be emitted.
+  virtual void insertOutlinerPrologue(MachineBasicBlock &MBB,
+                                      MachineFunction &MF,
+                                      const MachineOutlinerInfo &MInfo) const {
+    llvm_unreachable(
+        "Target didn't implement TargetInstrInfo::insertOutlinerPrologue!");
+  }
+
+  /// Return true if the function can safely be outlined from.
+  /// A function \p MF is considered safe for outlining if an outlined function
+  /// produced from instructions in F will produce a program which produces the
+  /// same output for any set of given inputs.
+  virtual bool isFunctionSafeToOutlineFrom(MachineFunction &MF,
+                                           bool OutlineFromLinkOnceODRs) const {
+    llvm_unreachable("Target didn't implement "
+                     "TargetInstrInfo::isFunctionSafeToOutlineFrom!");
+  }
+
+private:
+  unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode;
+  unsigned CatchRetOpcode;
+  unsigned ReturnOpcode;
+};
+
+/// \brief Provide DenseMapInfo for TargetInstrInfo::RegSubRegPair.
+template <> struct DenseMapInfo<TargetInstrInfo::RegSubRegPair> {
+  using RegInfo = DenseMapInfo<unsigned>;
+
+  static inline TargetInstrInfo::RegSubRegPair getEmptyKey() {
+    return TargetInstrInfo::RegSubRegPair(RegInfo::getEmptyKey(),
+                                          RegInfo::getEmptyKey());
+  }
+
+  static inline TargetInstrInfo::RegSubRegPair getTombstoneKey() {
+    return TargetInstrInfo::RegSubRegPair(RegInfo::getTombstoneKey(),
+                                          RegInfo::getTombstoneKey());
+  }
+
+  /// \brief Reuse getHashValue implementation from
+  /// std::pair<unsigned, unsigned>.
+  static unsigned getHashValue(const TargetInstrInfo::RegSubRegPair &Val) {
+    std::pair<unsigned, unsigned> PairVal = std::make_pair(Val.Reg, Val.SubReg);
+    return DenseMapInfo<std::pair<unsigned, unsigned>>::getHashValue(PairVal);
+  }
+
+  static bool isEqual(const TargetInstrInfo::RegSubRegPair &LHS,
+                      const TargetInstrInfo::RegSubRegPair &RHS) {
+    return RegInfo::isEqual(LHS.Reg, RHS.Reg) &&
+           RegInfo::isEqual(LHS.SubReg, RHS.SubReg);
+  }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TARGET_TARGETINSTRINFO_H
diff --git a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h
new file mode 100644
index 0000000000000..0fa19d09e776a
--- /dev/null
+++ b/include/llvm/CodeGen/TargetLowering.h
@@ -0,0 +1,3539 @@
+//===- llvm/CodeGen/TargetLowering.h - Target Lowering Info -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file describes how to lower LLVM code to machine code.  This has two
+/// main components:
+///
+///  1. Which ValueTypes are natively supported by the target.
+///  2. Which operations are supported for supported ValueTypes.
+///  3. Cost thresholds for alternative implementations of certain operations.
+///
+/// In addition it has a few other components, like information about FP
+/// immediates.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_TARGETLOWERING_H
+#define LLVM_CODEGEN_TARGETLOWERING_H
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/DAGCombine.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetCallingConv.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+#include <cassert>
+#include <climits>
+#include <cstdint>
+#include <iterator>
+#include <map>
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+
+class BranchProbability;
+class CCState;
+class CCValAssign;
+class Constant;
+class FastISel;
+class FunctionLoweringInfo;
+class GlobalValue;
+class IntrinsicInst;
+struct KnownBits;
+class LLVMContext;
+class MachineBasicBlock;
+class MachineFunction;
+class MachineInstr;
+class MachineJumpTableInfo;
+class MachineLoop;
+class MachineRegisterInfo;
+class MCContext;
+class MCExpr;
+class Module;
+class TargetRegisterClass;
+class TargetLibraryInfo;
+class TargetRegisterInfo;
+class Value;
+
+namespace Sched {
+
+  enum Preference {
+    None,             // No preference
+    Source,           // Follow source order.
+    RegPressure,      // Scheduling for lowest register pressure.
+    Hybrid,           // Scheduling for both latency and register pressure.
+    ILP,              // Scheduling for ILP in low register pressure mode.
+    VLIW              // Scheduling for VLIW targets.
+  };
+
+} // end namespace Sched
+
+/// This base class for TargetLowering contains the SelectionDAG-independent
+/// parts that can be used from the rest of CodeGen.
+class TargetLoweringBase {
+public:
+  /// This enum indicates whether operations are valid for a target, and if not,
+  /// what action should be used to make them valid.
+  enum LegalizeAction : uint8_t {
+    Legal,      // The target natively supports this operation.
+    Promote,    // This operation should be executed in a larger type.
+    Expand,     // Try to expand this to other ops, otherwise use a libcall.
+    LibCall,    // Don't try to expand this to other ops, always use a libcall.
+    Custom      // Use the LowerOperation hook to implement custom lowering.
+  };
+
+  /// This enum indicates whether a types are legal for a target, and if not,
+  /// what action should be used to make them valid.
+  enum LegalizeTypeAction : uint8_t {
+    TypeLegal,           // The target natively supports this type.
+    TypePromoteInteger,  // Replace this integer with a larger one.
+    TypeExpandInteger,   // Split this integer into two of half the size.
+    TypeSoftenFloat,     // Convert this float to a same size integer type,
+                         // if an operation is not supported in target HW.
+    TypeExpandFloat,     // Split this float into two of half the size.
+    TypeScalarizeVector, // Replace this one-element vector with its element.
+    TypeSplitVector,     // Split this vector into two of half the size.
+    TypeWidenVector,     // This vector should be widened into a larger vector.
+    TypePromoteFloat     // Replace this float with a larger one.
+  };
+
+  /// LegalizeKind holds the legalization kind that needs to happen to EVT
+  /// in order to type-legalize it.
+  using LegalizeKind = std::pair<LegalizeTypeAction, EVT>;
+
+  /// Enum that describes how the target represents true/false values.
+  enum BooleanContent {
+    UndefinedBooleanContent,    // Only bit 0 counts, the rest can hold garbage.
+    ZeroOrOneBooleanContent,        // All bits zero except for bit 0.
+    ZeroOrNegativeOneBooleanContent // All bits equal to bit 0.
+  };
+
+  /// Enum that describes what type of support for selects the target has.
+  enum SelectSupportKind {
+    ScalarValSelect,      // The target supports scalar selects (ex: cmov).
+    ScalarCondVectorVal,  // The target supports selects with a scalar condition
+                          // and vector values (ex: cmov).
+    VectorMaskSelect      // The target supports vector selects with a vector
+                          // mask (ex: x86 blends).
+  };
+
+  /// Enum that specifies what an atomic load/AtomicRMWInst is expanded
+  /// to, if at all. Exists because different targets have different levels of
+  /// support for these atomic instructions, and also have different options
+  /// w.r.t. what they should expand to.
+  enum class AtomicExpansionKind {
+    None,    // Don't expand the instruction.
+    LLSC,    // Expand the instruction into loadlinked/storeconditional; used
+             // by ARM/AArch64.
+    LLOnly,  // Expand the (load) instruction into just a load-linked, which has
+             // greater atomic guarantees than a normal load.
+    CmpXChg, // Expand the instruction into cmpxchg; used by at least X86.
+  };
+
+  /// Enum that specifies when a multiplication should be expanded.
+  enum class MulExpansionKind {
+    Always,            // Always expand the instruction.
+    OnlyLegalOrCustom, // Only expand when the resulting instructions are legal
+                       // or custom.
+  };
+
+  class ArgListEntry {
+  public:
+    Value *Val = nullptr;
+    SDValue Node = SDValue();
+    Type *Ty = nullptr;
+    bool IsSExt : 1;
+    bool IsZExt : 1;
+    bool IsInReg : 1;
+    bool IsSRet : 1;
+    bool IsNest : 1;
+    bool IsByVal : 1;
+    bool IsInAlloca : 1;
+    bool IsReturned : 1;
+    bool IsSwiftSelf : 1;
+    bool IsSwiftError : 1;
+    uint16_t Alignment = 0;
+
+    ArgListEntry()
+        : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false),
+          IsNest(false), IsByVal(false), IsInAlloca(false), IsReturned(false),
+          IsSwiftSelf(false), IsSwiftError(false) {}
+
+    void setAttributes(ImmutableCallSite *CS, unsigned ArgIdx);
+  };
+  using ArgListTy = std::vector<ArgListEntry>;
+
+  virtual void markLibCallAttributes(MachineFunction *MF, unsigned CC,
+                                     ArgListTy &Args) const {};
+
+  static ISD::NodeType getExtendForContent(BooleanContent Content) {
+    switch (Content) {
+    case UndefinedBooleanContent:
+      // Extend by adding rubbish bits.
+      return ISD::ANY_EXTEND;
+    case ZeroOrOneBooleanContent:
+      // Extend by adding zero bits.
+      return ISD::ZERO_EXTEND;
+    case ZeroOrNegativeOneBooleanContent:
+      // Extend by copying the sign bit.
+      return ISD::SIGN_EXTEND;
+    }
+    llvm_unreachable("Invalid content kind");
+  }
+
+  /// NOTE: The TargetMachine owns TLOF.
+  explicit TargetLoweringBase(const TargetMachine &TM);
+  TargetLoweringBase(const TargetLoweringBase &) = delete;
+  TargetLoweringBase &operator=(const TargetLoweringBase &) = delete;
+  virtual ~TargetLoweringBase() = default;
+
+protected:
+  /// \brief Initialize all of the actions to default values.
+  void initActions();
+
+public:
+  const TargetMachine &getTargetMachine() const { return TM; }
+
+  virtual bool useSoftFloat() const { return false; }
+
+  /// Return the pointer type for the given address space, defaults to
+  /// the pointer type from the data layout.
+  /// FIXME: The default needs to be removed once all the code is updated.
+  MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const {
+    return MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
+  }
+
+  /// Return the type for frame index, which is determined by
+  /// the alloca address space specified through the data layout.
+  MVT getFrameIndexTy(const DataLayout &DL) const {
+    return getPointerTy(DL, DL.getAllocaAddrSpace());
+  }
+
+  /// Return the type for operands of fence.
+  /// TODO: Let fence operands be of i32 type and remove this.
+  virtual MVT getFenceOperandTy(const DataLayout &DL) const {
+    return getPointerTy(DL);
+  }
+
+  /// EVT is not used in-tree, but is used by out-of-tree target.
+  /// A documentation for this function would be nice...
+  virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const;
+
+  EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const;
+
+  /// Returns the type to be used for the index operand of:
+  /// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
+  /// ISD::INSERT_SUBVECTOR, and ISD::EXTRACT_SUBVECTOR
+  virtual MVT getVectorIdxTy(const DataLayout &DL) const {
+    return getPointerTy(DL);
+  }
+
+  virtual bool isSelectSupported(SelectSupportKind /*kind*/) const {
+    return true;
+  }
+
+  /// Return true if multiple condition registers are available.
+  bool hasMultipleConditionRegisters() const {
+    return HasMultipleConditionRegisters;
+  }
+
+  /// Return true if the target has BitExtract instructions.
+  bool hasExtractBitsInsn() const { return HasExtractBitsInsn; }
+
+  /// Return the preferred vector type legalization action.
+  virtual TargetLoweringBase::LegalizeTypeAction
+  getPreferredVectorAction(EVT VT) const {
+    // The default action for one element vectors is to scalarize
+    if (VT.getVectorNumElements() == 1)
+      return TypeScalarizeVector;
+    // The default action for other vectors is to promote
+    return TypePromoteInteger;
+  }
+
+  // There are two general methods for expanding a BUILD_VECTOR node:
+  //  1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle
+  //     them together.
+  //  2. Build the vector on the stack and then load it.
+  // If this function returns true, then method (1) will be used, subject to
+  // the constraint that all of the necessary shuffles are legal (as determined
+  // by isShuffleMaskLegal). If this function returns false, then method (2) is
+  // always used. The vector type, and the number of defined values, are
+  // provided.
+  virtual bool
+  shouldExpandBuildVectorWithShuffles(EVT /* VT */,
+                                      unsigned DefinedValues) const {
+    return DefinedValues < 3;
+  }
+
+  /// Return true if integer divide is usually cheaper than a sequence of
+  /// several shifts, adds, and multiplies for this target.
+  /// The definition of "cheaper" may depend on whether we're optimizing
+  /// for speed or for size.
+  virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const { return false; }
+
+  /// Return true if the target can handle a standalone remainder operation.
+  virtual bool hasStandaloneRem(EVT VT) const {
+    return true;
+  }
+
+  /// Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
+  virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const {
+    // Default behavior is to replace SQRT(X) with X*RSQRT(X).
+    return false;
+  }
+
+  /// Reciprocal estimate status values used by the functions below.
+  enum ReciprocalEstimate : int {
+    Unspecified = -1,
+    Disabled = 0,
+    Enabled = 1
+  };
+
+  /// Return a ReciprocalEstimate enum value for a square root of the given type
+  /// based on the function's attributes. If the operation is not overridden by
+  /// the function's attributes, "Unspecified" is returned and target defaults
+  /// are expected to be used for instruction selection.
+  int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const;
+
+  /// Return a ReciprocalEstimate enum value for a division of the given type
+  /// based on the function's attributes. If the operation is not overridden by
+  /// the function's attributes, "Unspecified" is returned and target defaults
+  /// are expected to be used for instruction selection.
+  int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const;
+
+  /// Return the refinement step count for a square root of the given type based
+  /// on the function's attributes. If the operation is not overridden by
+  /// the function's attributes, "Unspecified" is returned and target defaults
+  /// are expected to be used for instruction selection.
+  int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const;
+
+  /// Return the refinement step count for a division of the given type based
+  /// on the function's attributes. If the operation is not overridden by
+  /// the function's attributes, "Unspecified" is returned and target defaults
+  /// are expected to be used for instruction selection.
+  int getDivRefinementSteps(EVT VT, MachineFunction &MF) const;
+
+  /// Returns true if target has indicated at least one type should be bypassed.
+  bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); }
+
+  /// Returns map of slow types for division or remainder with corresponding
+  /// fast types
+  const DenseMap<unsigned int, unsigned int> &getBypassSlowDivWidths() const {
+    return BypassSlowDivWidths;
+  }
+
+  /// Return true if Flow Control is an expensive operation that should be
+  /// avoided.
+  bool isJumpExpensive() const { return JumpIsExpensive; }
+
+  /// Return true if selects are only cheaper than branches if the branch is
+  /// unlikely to be predicted right.
+  bool isPredictableSelectExpensive() const {
+    return PredictableSelectIsExpensive;
+  }
+
+  /// If a branch or a select condition is skewed in one direction by more than
+  /// this factor, it is very likely to be predicted correctly.
+  virtual BranchProbability getPredictableBranchThreshold() const;
+
+  /// Return true if the following transform is beneficial:
+  /// fold (conv (load x)) -> (load (conv*)x)
+  /// On architectures that don't natively support some vector loads
+  /// efficiently, casting the load to a smaller vector of larger types and
+  /// loading is more efficient, however, this can be undone by optimizations in
+  /// dag combiner.
+  virtual bool isLoadBitCastBeneficial(EVT LoadVT,
+                                       EVT BitcastVT) const {
+    // Don't do if we could do an indexed load on the original type, but not on
+    // the new one.
+    if (!LoadVT.isSimple() || !BitcastVT.isSimple())
+      return true;
+
+    MVT LoadMVT = LoadVT.getSimpleVT();
+
+    // Don't bother doing this if it's just going to be promoted again later, as
+    // doing so might interfere with other combines.
+    if (getOperationAction(ISD::LOAD, LoadMVT) == Promote &&
+        getTypeToPromoteTo(ISD::LOAD, LoadMVT) == BitcastVT.getSimpleVT())
+      return false;
+
+    return true;
+  }
+
+  /// Return true if the following transform is beneficial:
+  /// (store (y (conv x)), y*)) -> (store x, (x*))
+  virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT) const {
+    // Default to the same logic as loads.
+    return isLoadBitCastBeneficial(StoreVT, BitcastVT);
+  }
+
+  /// Return true if it is expected to be cheaper to do a store of a non-zero
+  /// vector constant with the given size and type for the address space than to
+  /// store the individual scalar element constants.
+  virtual bool storeOfVectorConstantIsCheap(EVT MemVT,
+                                            unsigned NumElem,
+                                            unsigned AddrSpace) const {
+    return false;
+  }
+
+  /// Allow store merging after legalization in addition to before legalization.
+  /// This may catch stores that do not exist earlier (eg, stores created from
+  /// intrinsics).
+  virtual bool mergeStoresAfterLegalization() const { return true; }
+
+  /// Returns if it's reasonable to merge stores to MemVT size.
+  virtual bool canMergeStoresTo(unsigned AS, EVT MemVT,
+                                const SelectionDAG &DAG) const {
+    return true;
+  }
+
+  /// \brief Return true if it is cheap to speculate a call to intrinsic cttz.
+  virtual bool isCheapToSpeculateCttz() const {
+    return false;
+  }
+
+  /// \brief Return true if it is cheap to speculate a call to intrinsic ctlz.
+  virtual bool isCheapToSpeculateCtlz() const {
+    return false;
+  }
+
+  /// \brief Return true if ctlz instruction is fast.
+  virtual bool isCtlzFast() const {
+    return false;
+  }
+
+  /// Return true if it is safe to transform an integer-domain bitwise operation
+  /// into the equivalent floating-point operation. This should be set to true
+  /// if the target has IEEE-754-compliant fabs/fneg operations for the input
+  /// type.
+  virtual bool hasBitPreservingFPLogic(EVT VT) const {
+    return false;
+  }
+
+  /// \brief Return true if it is cheaper to split the store of a merged int val
+  /// from a pair of smaller values into multiple stores.
+  virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const {
+    return false;
+  }
+
+  /// \brief Return if the target supports combining a
+  /// chain like:
+  /// \code
+  ///   %andResult = and %val1, #mask
+  ///   %icmpResult = icmp %andResult, 0
+  /// \endcode
+  /// into a single machine instruction of a form like:
+  /// \code
+  ///   cc = test %register, #mask
+  /// \endcode
+  virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
+    return false;
+  }
+
+  /// Use bitwise logic to make pairs of compares more efficient. For example:
+  /// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
+  /// This should be true when it takes more than one instruction to lower
+  /// setcc (cmp+set on x86 scalar), when bitwise ops are faster than logic on
+  /// condition bits (crand on PowerPC), and/or when reducing cmp+br is a win.
+  virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const {
+    return false;
+  }
+
+  /// Return the preferred operand type if the target has a quick way to compare
+  /// integer values of the given size. Assume that any legal integer type can
+  /// be compared efficiently. Targets may override this to allow illegal wide
+  /// types to return a vector type if there is support to compare that type.
+  virtual MVT hasFastEqualityCompare(unsigned NumBits) const {
+    MVT VT = MVT::getIntegerVT(NumBits);
+    return isTypeLegal(VT) ? VT : MVT::INVALID_SIMPLE_VALUE_TYPE;
+  }
+
+  /// Return true if the target should transform:
+  /// (X & Y) == Y ---> (~X & Y) == 0
+  /// (X & Y) != Y ---> (~X & Y) != 0
+  ///
+  /// This may be profitable if the target has a bitwise and-not operation that
+  /// sets comparison flags. A target may want to limit the transformation based
+  /// on the type of Y or if Y is a constant.
+  ///
+  /// Note that the transform will not occur if Y is known to be a power-of-2
+  /// because a mask and compare of a single bit can be handled by inverting the
+  /// predicate, for example:
+  /// (X & 8) == 8 ---> (X & 8) != 0
+  virtual bool hasAndNotCompare(SDValue Y) const {
+    return false;
+  }
+
+  /// Return true if the target has a bitwise and-not operation:
+  /// X = ~A & B
+  /// This can be used to simplify select or other instructions.
+  virtual bool hasAndNot(SDValue X) const {
+    // If the target has the more complex version of this operation, assume that
+    // it has this operation too.
+    return hasAndNotCompare(X);
+  }
+
+  /// \brief Return true if the target wants to use the optimization that
+  /// turns ext(promotableInst1(...(promotableInstN(load)))) into
+  /// promotedInst1(...(promotedInstN(ext(load)))).
+  bool enableExtLdPromotion() const { return EnableExtLdPromotion; }
+
+  /// Return true if the target can combine store(extractelement VectorTy,
+  /// Idx).
+  /// \p Cost[out] gives the cost of that transformation when this is true.
+  virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
+                                         unsigned &Cost) const {
+    return false;
+  }
+
+  /// Return true if target supports floating point exceptions.
+  bool hasFloatingPointExceptions() const {
+    return HasFloatingPointExceptions;
+  }
+
+  /// Return true if target always beneficiates from combining into FMA for a
+  /// given value type. This must typically return false on targets where FMA
+  /// takes more cycles to execute than FADD.
+  virtual bool enableAggressiveFMAFusion(EVT VT) const {
+    return false;
+  }
+
+  /// Return the ValueType of the result of SETCC operations.
+  virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
+                                 EVT VT) const;
+
+  /// Return the ValueType for comparison libcalls. Comparions libcalls include
+  /// floating point comparion calls, and Ordered/Unordered check calls on
+  /// floating point numbers.
+  virtual
+  MVT::SimpleValueType getCmpLibcallReturnType() const;
+
+  /// For targets without i1 registers, this gives the nature of the high-bits
+  /// of boolean values held in types wider than i1.
+  ///
+  /// "Boolean values" are special true/false values produced by nodes like
+  /// SETCC and consumed (as the condition) by nodes like SELECT and BRCOND.
+  /// Not to be confused with general values promoted from i1.  Some cpus
+  /// distinguish between vectors of boolean and scalars; the isVec parameter
+  /// selects between the two kinds.  For example on X86 a scalar boolean should
+  /// be zero extended from i1, while the elements of a vector of booleans
+  /// should be sign extended from i1.
+  ///
+  /// Some cpus also treat floating point types the same way as they treat
+  /// vectors instead of the way they treat scalars.
+  BooleanContent getBooleanContents(bool isVec, bool isFloat) const {
+    if (isVec)
+      return BooleanVectorContents;
+    return isFloat ? BooleanFloatContents : BooleanContents;
+  }
+
+  BooleanContent getBooleanContents(EVT Type) const {
+    return getBooleanContents(Type.isVector(), Type.isFloatingPoint());
+  }
+
+  /// Return target scheduling preference.
+  Sched::Preference getSchedulingPreference() const {
+    return SchedPreferenceInfo;
+  }
+
+  /// Some scheduler, e.g. hybrid, can switch to different scheduling heuristics
+  /// for different nodes. This function returns the preference (or none) for
+  /// the given node.
+  virtual Sched::Preference getSchedulingPreference(SDNode *) const {
+    return Sched::None;
+  }
+
+  /// Return the register class that should be used for the specified value
+  /// type.
+  virtual const TargetRegisterClass *getRegClassFor(MVT VT) const {
+    const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
+    assert(RC && "This value type is not natively supported!");
+    return RC;
+  }
+
+  /// Return the 'representative' register class for the specified value
+  /// type.
+  ///
+  /// The 'representative' register class is the largest legal super-reg
+  /// register class for the register class of the value type.  For example, on
+  /// i386 the rep register class for i8, i16, and i32 are GR32; while the rep
+  /// register class is GR64 on x86_64.
+  virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const {
+    const TargetRegisterClass *RC = RepRegClassForVT[VT.SimpleTy];
+    return RC;
+  }
+
+  /// Return the cost of the 'representative' register class for the specified
+  /// value type.
+  virtual uint8_t getRepRegClassCostFor(MVT VT) const {
+    return RepRegClassCostForVT[VT.SimpleTy];
+  }
+
+  /// Return true if the target has native support for the specified value type.
+  /// This means that it has a register that directly holds it without
+  /// promotions or expansions.
+  bool isTypeLegal(EVT VT) const {
+    assert(!VT.isSimple() ||
+           (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT));
+    return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != nullptr;
+  }
+
+  class ValueTypeActionImpl {
+    /// ValueTypeActions - For each value type, keep a LegalizeTypeAction enum
+    /// that indicates how instruction selection should deal with the type.
+    LegalizeTypeAction ValueTypeActions[MVT::LAST_VALUETYPE];
+
+  public:
+    ValueTypeActionImpl() {
+      std::fill(std::begin(ValueTypeActions), std::end(ValueTypeActions),
+                TypeLegal);
+    }
+
+    LegalizeTypeAction getTypeAction(MVT VT) const {
+      return ValueTypeActions[VT.SimpleTy];
+    }
+
+    void setTypeAction(MVT VT, LegalizeTypeAction Action) {
+      ValueTypeActions[VT.SimpleTy] = Action;
+    }
+  };
+
+  const ValueTypeActionImpl &getValueTypeActions() const {
+    return ValueTypeActions;
+  }
+
+  /// Return how we should legalize values of this type, either it is already
+  /// legal (return 'Legal') or we need to promote it to a larger type (return
+  /// 'Promote'), or we need to expand it into multiple registers of smaller
+  /// integer type (return 'Expand').  'Custom' is not an option.
+  LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const {
+    return getTypeConversion(Context, VT).first;
+  }
+  LegalizeTypeAction getTypeAction(MVT VT) const {
+    return ValueTypeActions.getTypeAction(VT);
+  }
+
+  /// For types supported by the target, this is an identity function.  For
+  /// types that must be promoted to larger types, this returns the larger type
+  /// to promote to.  For integer types that are larger than the largest integer
+  /// register, this contains one step in the expansion to get to the smaller
+  /// register. For illegal floating point types, this returns the integer type
+  /// to transform to.
+  EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const {
+    return getTypeConversion(Context, VT).second;
+  }
+
+  /// For types supported by the target, this is an identity function.  For
+  /// types that must be expanded (i.e. integer types that are larger than the
+  /// largest integer register or illegal floating point types), this returns
+  /// the largest legal type it will be expanded to.
+  EVT getTypeToExpandTo(LLVMContext &Context, EVT VT) const {
+    assert(!VT.isVector());
+    while (true) {
+      switch (getTypeAction(Context, VT)) {
+      case TypeLegal:
+        return VT;
+      case TypeExpandInteger:
+        VT = getTypeToTransformTo(Context, VT);
+        break;
+      default:
+        llvm_unreachable("Type is not legal nor is it to be expanded!");
+      }
+    }
+  }
+
+  /// Vector types are broken down into some number of legal first class types.
+  /// For example, EVT::v8f32 maps to 2 EVT::v4f32 with Altivec or SSE1, or 8
+  /// promoted EVT::f64 values with the X86 FP stack.  Similarly, EVT::v2i64
+  /// turns into 4 EVT::i32 values with both PPC and X86.
+  ///
+  /// This method returns the number of registers needed, and the VT for each
+  /// register.  It also returns the VT and quantity of the intermediate values
+  /// before they are promoted/expanded.
+  unsigned getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
+                                  EVT &IntermediateVT,
+                                  unsigned &NumIntermediates,
+                                  MVT &RegisterVT) const;
+
+  /// Certain targets such as MIPS require that some types such as vectors are
+  /// always broken down into scalars in some contexts. This occurs even if the
+  /// vector type is legal.
+  virtual unsigned getVectorTypeBreakdownForCallingConv(
+      LLVMContext &Context, EVT VT, EVT &IntermediateVT,
+      unsigned &NumIntermediates, MVT &RegisterVT) const {
+    return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates,
+                                  RegisterVT);
+  }
+
+  struct IntrinsicInfo {
+    unsigned     opc = 0;          // target opcode
+    EVT          memVT;            // memory VT
+
+    // value representing memory location
+    PointerUnion<const Value *, const PseudoSourceValue *> ptrVal;
+
+    int          offset = 0;       // offset off of ptrVal
+    unsigned     size = 0;         // the size of the memory location
+                                   // (taken from memVT if zero)
+    unsigned     align = 1;        // alignment
+
+    MachineMemOperand::Flags flags = MachineMemOperand::MONone;
+    IntrinsicInfo() = default;
+  };
+
+  /// Given an intrinsic, checks if on the target the intrinsic will need to map
+  /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
+  /// true and store the intrinsic information into the IntrinsicInfo that was
+  /// passed to the function.
+  virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
+                                  MachineFunction &,
+                                  unsigned /*Intrinsic*/) const {
+    return false;
+  }
+
+  /// Returns true if the target can instruction select the specified FP
+  /// immediate natively. If false, the legalizer will materialize the FP
+  /// immediate as a load from a constant pool.
+  virtual bool isFPImmLegal(const APFloat &/*Imm*/, EVT /*VT*/) const {
+    return false;
+  }
+
+  /// Targets can use this to indicate that they only support *some*
+  /// VECTOR_SHUFFLE operations, those with specific masks.  By default, if a
+  /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to be
+  /// legal.
+  virtual bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const {
+    return true;
+  }
+
+  /// Returns true if the operation can trap for the value type.
+  ///
+  /// VT must be a legal type. By default, we optimistically assume most
+  /// operations don't trap except for integer divide and remainder.
+  virtual bool canOpTrap(unsigned Op, EVT VT) const;
+
+  /// Similar to isShuffleMaskLegal. This is used by Targets can use this to
+  /// indicate if there is a suitable VECTOR_SHUFFLE that can be used to replace
+  /// a VAND with a constant pool entry.
+  virtual bool isVectorClearMaskLegal(const SmallVectorImpl<int> &/*Mask*/,
+                                      EVT /*VT*/) const {
+    return false;
+  }
+
+  /// Return how this operation should be treated: either it is legal, needs to
+  /// be promoted to a larger size, needs to be expanded to some other code
+  /// sequence, or the target has a custom expander for it.
+  LegalizeAction getOperationAction(unsigned Op, EVT VT) const {
+    if (VT.isExtended()) return Expand;
+    // If a target-specific SDNode requires legalization, require the target
+    // to provide custom legalization for it.
+    if (Op >= array_lengthof(OpActions[0])) return Custom;
+    return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op];
+  }
+
+  /// Return true if the specified operation is legal on this target or can be
+  /// made legal with custom lowering. This is used to help guide high-level
+  /// lowering decisions.
+  bool isOperationLegalOrCustom(unsigned Op, EVT VT) const {
+    return (VT == MVT::Other || isTypeLegal(VT)) &&
+      (getOperationAction(Op, VT) == Legal ||
+       getOperationAction(Op, VT) == Custom);
+  }
+
+  /// Return true if the specified operation is legal on this target or can be
+  /// made legal using promotion. This is used to help guide high-level lowering
+  /// decisions.
+  bool isOperationLegalOrPromote(unsigned Op, EVT VT) const {
+    return (VT == MVT::Other || isTypeLegal(VT)) &&
+      (getOperationAction(Op, VT) == Legal ||
+       getOperationAction(Op, VT) == Promote);
+  }
+
+  /// Return true if the specified operation is legal on this target or can be
+  /// made legal with custom lowering or using promotion. This is used to help
+  /// guide high-level lowering decisions.
+  bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT) const {
+    return (VT == MVT::Other || isTypeLegal(VT)) &&
+      (getOperationAction(Op, VT) == Legal ||
+       getOperationAction(Op, VT) == Custom ||
+       getOperationAction(Op, VT) == Promote);
+  }
+
+  /// Return true if the operation uses custom lowering, regardless of whether
+  /// the type is legal or not.
+  bool isOperationCustom(unsigned Op, EVT VT) const {
+    return getOperationAction(Op, VT) == Custom;
+  }
+
+  /// Return true if lowering to a jump table is allowed.
+  bool areJTsAllowed(const Function *Fn) const {
+    if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true")
+      return false;
+
+    return isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
+           isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
+  }
+
+  /// Check whether the range [Low,High] fits in a machine word.
+  bool rangeFitsInWord(const APInt &Low, const APInt &High,
+                       const DataLayout &DL) const {
+    // FIXME: Using the pointer type doesn't seem ideal.
+    uint64_t BW = DL.getPointerSizeInBits();
+    uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1;
+    return Range <= BW;
+  }
+
+  /// Return true if lowering to a jump table is suitable for a set of case
+  /// clusters which may contain \p NumCases cases, \p Range range of values.
+  /// FIXME: This function check the maximum table size and density, but the
+  /// minimum size is not checked. It would be nice if the the minimum size is
+  /// also combined within this function. Currently, the minimum size check is
+  /// performed in findJumpTable() in SelectionDAGBuiler and
+  /// getEstimatedNumberOfCaseClusters() in BasicTTIImpl.
+  bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases,
+                              uint64_t Range) const {
+    const bool OptForSize = SI->getParent()->getParent()->optForSize();
+    const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize);
+    const unsigned MaxJumpTableSize =
+        OptForSize || getMaximumJumpTableSize() == 0
+            ? UINT_MAX
+            : getMaximumJumpTableSize();
+    // Check whether a range of clusters is dense enough for a jump table.
+    if (Range <= MaxJumpTableSize &&
+        (NumCases * 100 >= Range * MinDensity)) {
+      return true;
+    }
+    return false;
+  }
+
+  /// Return true if lowering to a bit test is suitable for a set of case
+  /// clusters which contains \p NumDests unique destinations, \p Low and
+  /// \p High as its lowest and highest case values, and expects \p NumCmps
+  /// case value comparisons. Check if the number of destinations, comparison
+  /// metric, and range are all suitable.
+  bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps,
+                             const APInt &Low, const APInt &High,
+                             const DataLayout &DL) const {
+    // FIXME: I don't think NumCmps is the correct metric: a single case and a
+    // range of cases both require only one branch to lower. Just looking at the
+    // number of clusters and destinations should be enough to decide whether to
+    // build bit tests.
+
+    // To lower a range with bit tests, the range must fit the bitwidth of a
+    // machine word.
+    if (!rangeFitsInWord(Low, High, DL))
+      return false;
+
+    // Decide whether it's profitable to lower this range with bit tests. Each
+    // destination requires a bit test and branch, and there is an overall range
+    // check branch. For a small number of clusters, separate comparisons might
+    // be cheaper, and for many destinations, splitting the range might be
+    // better.
+    return (NumDests == 1 && NumCmps >= 3) || (NumDests == 2 && NumCmps >= 5) ||
+           (NumDests == 3 && NumCmps >= 6);
+  }
+
+  /// Return true if the specified operation is illegal on this target or
+  /// unlikely to be made legal with custom lowering. This is used to help guide
+  /// high-level lowering decisions.
+  bool isOperationExpand(unsigned Op, EVT VT) const {
+    return (!isTypeLegal(VT) || getOperationAction(Op, VT) == Expand);
+  }
+
+  /// Return true if the specified operation is legal on this target.
+  bool isOperationLegal(unsigned Op, EVT VT) const {
+    return (VT == MVT::Other || isTypeLegal(VT)) &&
+           getOperationAction(Op, VT) == Legal;
+  }
+
+  /// Return how this load with extension should be treated: either it is legal,
+  /// needs to be promoted to a larger size, needs to be expanded to some other
+  /// code sequence, or the target has a custom expander for it.
+  LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT,
+                                  EVT MemVT) const {
+    if (ValVT.isExtended() || MemVT.isExtended()) return Expand;
+    unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy;
+    unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy;
+    assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE &&
+           MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!");
+    unsigned Shift = 4 * ExtType;
+    return (LegalizeAction)((LoadExtActions[ValI][MemI] >> Shift) & 0xf);
+  }
+
+  /// Return true if the specified load with extension is legal on this target.
+  bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const {
+    return getLoadExtAction(ExtType, ValVT, MemVT) == Legal;
+  }
+
+  /// Return true if the specified load with extension is legal or custom
+  /// on this target.
+  bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const {
+    return getLoadExtAction(ExtType, ValVT, MemVT) == Legal ||
+           getLoadExtAction(ExtType, ValVT, MemVT) == Custom;
+  }
+
+  /// Return how this store with truncation should be treated: either it is
+  /// legal, needs to be promoted to a larger size, needs to be expanded to some
+  /// other code sequence, or the target has a custom expander for it.
+  LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const {
+    if (ValVT.isExtended() || MemVT.isExtended()) return Expand;
+    unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy;
+    unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy;
+    assert(ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE &&
+           "Table isn't big enough!");
+    return TruncStoreActions[ValI][MemI];
+  }
+
+  /// Return true if the specified store with truncation is legal on this
+  /// target.
+  bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const {
+    return isTypeLegal(ValVT) && getTruncStoreAction(ValVT, MemVT) == Legal;
+  }
+
+  /// Return true if the specified store with truncation has solution on this
+  /// target.
+  bool isTruncStoreLegalOrCustom(EVT ValVT, EVT MemVT) const {
+    return isTypeLegal(ValVT) &&
+      (getTruncStoreAction(ValVT, MemVT) == Legal ||
+       getTruncStoreAction(ValVT, MemVT) == Custom);
+  }
+
+  /// Return how the indexed load should be treated: either it is legal, needs
+  /// to be promoted to a larger size, needs to be expanded to some other code
+  /// sequence, or the target has a custom expander for it.
+  LegalizeAction
+  getIndexedLoadAction(unsigned IdxMode, MVT VT) const {
+    assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() &&
+           "Table isn't big enough!");
+    unsigned Ty = (unsigned)VT.SimpleTy;
+    return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] & 0xf0) >> 4);
+  }
+
+  /// Return true if the specified indexed load is legal on this target.
+  bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const {
+    return VT.isSimple() &&
+      (getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Legal ||
+       getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Custom);
+  }
+
+  /// Return how the indexed store should be treated: either it is legal, needs
+  /// to be promoted to a larger size, needs to be expanded to some other code
+  /// sequence, or the target has a custom expander for it.
+  LegalizeAction
+  getIndexedStoreAction(unsigned IdxMode, MVT VT) const {
+    assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() &&
+           "Table isn't big enough!");
+    unsigned Ty = (unsigned)VT.SimpleTy;
+    return (LegalizeAction)(IndexedModeActions[Ty][IdxMode] & 0x0f);
+  }
+
+  /// Return true if the specified indexed load is legal on this target.
+  bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const {
+    return VT.isSimple() &&
+      (getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Legal ||
+       getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Custom);
+  }
+
+  /// Return how the condition code should be treated: either it is legal, needs
+  /// to be expanded to some other code sequence, or the target has a custom
+  /// expander for it.
+  LegalizeAction
+  getCondCodeAction(ISD::CondCode CC, MVT VT) const {
+    assert((unsigned)CC < array_lengthof(CondCodeActions) &&
+           ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) &&
+           "Table isn't big enough!");
+    // See setCondCodeAction for how this is encoded.
+    uint32_t Shift = 4 * (VT.SimpleTy & 0x7);
+    uint32_t Value = CondCodeActions[CC][VT.SimpleTy >> 3];
+    LegalizeAction Action = (LegalizeAction) ((Value >> Shift) & 0xF);
+    assert(Action != Promote && "Can't promote condition code!");
+    return Action;
+  }
+
+  /// Return true if the specified condition code is legal on this target.
+  bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const {
+    return
+      getCondCodeAction(CC, VT) == Legal ||
+      getCondCodeAction(CC, VT) == Custom;
+  }
+
+  /// If the action for this operation is to promote, this method returns the
+  /// ValueType to promote to.
+  MVT getTypeToPromoteTo(unsigned Op, MVT VT) const {
+    assert(getOperationAction(Op, VT) == Promote &&
+           "This operation isn't promoted!");
+
+    // See if this has an explicit type specified.
+    std::map<std::pair<unsigned, MVT::SimpleValueType>,
+             MVT::SimpleValueType>::const_iterator PTTI =
+      PromoteToType.find(std::make_pair(Op, VT.SimpleTy));
+    if (PTTI != PromoteToType.end()) return PTTI->second;
+
+    assert((VT.isInteger() || VT.isFloatingPoint()) &&
+           "Cannot autopromote this type, add it with AddPromotedToType.");
+
+    MVT NVT = VT;
+    do {
+      NVT = (MVT::SimpleValueType)(NVT.SimpleTy+1);
+      assert(NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid &&
+             "Didn't find type to promote to!");
+    } while (!isTypeLegal(NVT) ||
+              getOperationAction(Op, NVT) == Promote);
+    return NVT;
+  }
+
+  /// Return the EVT corresponding to this LLVM type.  This is fixed by the LLVM
+  /// operations except for the pointer size.  If AllowUnknown is true, this
+  /// will return MVT::Other for types with no EVT counterpart (e.g. structs),
+  /// otherwise it will assert.
+  EVT getValueType(const DataLayout &DL, Type *Ty,
+                   bool AllowUnknown = false) const {
+    // Lower scalar pointers to native pointer types.
+    if (PointerType *PTy = dyn_cast<PointerType>(Ty))
+      return getPointerTy(DL, PTy->getAddressSpace());
+
+    if (Ty->isVectorTy()) {
+      VectorType *VTy = cast<VectorType>(Ty);
+      Type *Elm = VTy->getElementType();
+      // Lower vectors of pointers to native pointer types.
+      if (PointerType *PT = dyn_cast<PointerType>(Elm)) {
+        EVT PointerTy(getPointerTy(DL, PT->getAddressSpace()));
+        Elm = PointerTy.getTypeForEVT(Ty->getContext());
+      }
+
+      return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(Elm, false),
+                       VTy->getNumElements());
+    }
+    return EVT::getEVT(Ty, AllowUnknown);
+  }
+
+  /// Return the MVT corresponding to this LLVM type. See getValueType.
+  MVT getSimpleValueType(const DataLayout &DL, Type *Ty,
+                         bool AllowUnknown = false) const {
+    return getValueType(DL, Ty, AllowUnknown).getSimpleVT();
+  }
+
+  /// Return the desired alignment for ByVal or InAlloca aggregate function
+  /// arguments in the caller parameter area.  This is the actual alignment, not
+  /// its logarithm.
+  virtual unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const;
+
+  /// Return the type of registers that this ValueType will eventually require.
+  MVT getRegisterType(MVT VT) const {
+    assert((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT));
+    return RegisterTypeForVT[VT.SimpleTy];
+  }
+
+  /// Return the type of registers that this ValueType will eventually require.
+  MVT getRegisterType(LLVMContext &Context, EVT VT) const {
+    if (VT.isSimple()) {
+      assert((unsigned)VT.getSimpleVT().SimpleTy <
+                array_lengthof(RegisterTypeForVT));
+      return RegisterTypeForVT[VT.getSimpleVT().SimpleTy];
+    }
+    if (VT.isVector()) {
+      EVT VT1;
+      MVT RegisterVT;
+      unsigned NumIntermediates;
+      (void)getVectorTypeBreakdown(Context, VT, VT1,
+                                   NumIntermediates, RegisterVT);
+      return RegisterVT;
+    }
+    if (VT.isInteger()) {
+      return getRegisterType(Context, getTypeToTransformTo(Context, VT));
+    }
+    llvm_unreachable("Unsupported extended type!");
+  }
+
+  /// Return the number of registers that this ValueType will eventually
+  /// require.
+  ///
+  /// This is one for any types promoted to live in larger registers, but may be
+  /// more than one for types (like i64) that are split into pieces.  For types
+  /// like i140, which are first promoted then expanded, it is the number of
+  /// registers needed to hold all the bits of the original type.  For an i140
+  /// on a 32 bit machine this means 5 registers.
+  unsigned getNumRegisters(LLVMContext &Context, EVT VT) const {
+    if (VT.isSimple()) {
+      assert((unsigned)VT.getSimpleVT().SimpleTy <
+                array_lengthof(NumRegistersForVT));
+      return NumRegistersForVT[VT.getSimpleVT().SimpleTy];
+    }
+    if (VT.isVector()) {
+      EVT VT1;
+      MVT VT2;
+      unsigned NumIntermediates;
+      return getVectorTypeBreakdown(Context, VT, VT1, NumIntermediates, VT2);
+    }
+    if (VT.isInteger()) {
+      unsigned BitWidth = VT.getSizeInBits();
+      unsigned RegWidth = getRegisterType(Context, VT).getSizeInBits();
+      return (BitWidth + RegWidth - 1) / RegWidth;
+    }
+    llvm_unreachable("Unsupported extended type!");
+  }
+
+  /// Certain combinations of ABIs, Targets and features require that types
+  /// are legal for some operations and not for other operations.
+  /// For MIPS all vector types must be passed through the integer register set.
+  virtual MVT getRegisterTypeForCallingConv(MVT VT) const {
+    return getRegisterType(VT);
+  }
+
+  virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context,
+                                            EVT VT) const {
+    return getRegisterType(Context, VT);
+  }
+
+  /// Certain targets require unusual breakdowns of certain types. For MIPS,
+  /// this occurs when a vector type is used, as vector are passed through the
+  /// integer register set.
+  virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context,
+                                                 EVT VT) const {
+    return getNumRegisters(Context, VT);
+  }
+
+  /// Certain targets have context senstive alignment requirements, where one
+  /// type has the alignment requirement of another type.
+  virtual unsigned getABIAlignmentForCallingConv(Type *ArgTy,
+                                                 DataLayout DL) const {
+    return DL.getABITypeAlignment(ArgTy);
+  }
+
+  /// If true, then instruction selection should seek to shrink the FP constant
+  /// of the specified type to a smaller type in order to save space and / or
+  /// reduce runtime.
+  virtual bool ShouldShrinkFPConstant(EVT) const { return true; }
+
+  // Return true if it is profitable to reduce the given load node to a smaller
+  // type.
+  //
+  // e.g. (i16 (trunc (i32 (load x))) -> i16 load x should be performed
+  virtual bool shouldReduceLoadWidth(SDNode *Load,
+                                     ISD::LoadExtType ExtTy,
+                                     EVT NewVT) const {
+    return true;
+  }
+
+  /// When splitting a value of the specified type into parts, does the Lo
+  /// or Hi part come first?  This usually follows the endianness, except
+  /// for ppcf128, where the Hi part always comes first.
+  bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const {
+    return DL.isBigEndian() || VT == MVT::ppcf128;
+  }
+
+  /// If true, the target has custom DAG combine transformations that it can
+  /// perform for the specified node.
+  bool hasTargetDAGCombine(ISD::NodeType NT) const {
+    assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray));
+    return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7));
+  }
+
+  unsigned getGatherAllAliasesMaxDepth() const {
+    return GatherAllAliasesMaxDepth;
+  }
+
+  /// Returns the size of the platform's va_list object.
+  virtual unsigned getVaListSizeInBits(const DataLayout &DL) const {
+    return getPointerTy(DL).getSizeInBits();
+  }
+
+  /// \brief Get maximum # of store operations permitted for llvm.memset
+  ///
+  /// This function returns the maximum number of store operations permitted
+  /// to replace a call to llvm.memset. The value is set by the target at the
+  /// performance threshold for such a replacement. If OptSize is true,
+  /// return the limit for functions that have OptSize attribute.
+  unsigned getMaxStoresPerMemset(bool OptSize) const {
+    return OptSize ? MaxStoresPerMemsetOptSize : MaxStoresPerMemset;
+  }
+
+  /// \brief Get maximum # of store operations permitted for llvm.memcpy
+  ///
+  /// This function returns the maximum number of store operations permitted
+  /// to replace a call to llvm.memcpy. The value is set by the target at the
+  /// performance threshold for such a replacement. If OptSize is true,
+  /// return the limit for functions that have OptSize attribute.
+  unsigned getMaxStoresPerMemcpy(bool OptSize) const {
+    return OptSize ? MaxStoresPerMemcpyOptSize : MaxStoresPerMemcpy;
+  }
+
+  /// Get maximum # of load operations permitted for memcmp
+  ///
+  /// This function returns the maximum number of load operations permitted
+  /// to replace a call to memcmp. The value is set by the target at the
+  /// performance threshold for such a replacement. If OptSize is true,
+  /// return the limit for functions that have OptSize attribute.
+  unsigned getMaxExpandSizeMemcmp(bool OptSize) const {
+    return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp;
+  }
+
+  /// \brief Get maximum # of store operations permitted for llvm.memmove
+  ///
+  /// This function returns the maximum number of store operations permitted
+  /// to replace a call to llvm.memmove. The value is set by the target at the
+  /// performance threshold for such a replacement. If OptSize is true,
+  /// return the limit for functions that have OptSize attribute.
+  unsigned getMaxStoresPerMemmove(bool OptSize) const {
+    return OptSize ? MaxStoresPerMemmoveOptSize : MaxStoresPerMemmove;
+  }
+
+  /// \brief Determine if the target supports unaligned memory accesses.
+  ///
+  /// This function returns true if the target allows unaligned memory accesses
+  /// of the specified type in the given address space. If true, it also returns
+  /// whether the unaligned memory access is "fast" in the last argument by
+  /// reference. This is used, for example, in situations where an array
+  /// copy/move/set is converted to a sequence of store operations. Its use
+  /// helps to ensure that such replacements don't generate code that causes an
+  /// alignment error (trap) on the target machine.
+  virtual bool allowsMisalignedMemoryAccesses(EVT,
+                                              unsigned AddrSpace = 0,
+                                              unsigned Align = 1,
+                                              bool * /*Fast*/ = nullptr) const {
+    return false;
+  }
+
+  /// Return true if the target supports a memory access of this type for the
+  /// given address space and alignment. If the access is allowed, the optional
+  /// final parameter returns if the access is also fast (as defined by the
+  /// target).
+  bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
+                          unsigned AddrSpace = 0, unsigned Alignment = 1,
+                          bool *Fast = nullptr) const;
+
+  /// Returns the target specific optimal type for load and store operations as
+  /// a result of memset, memcpy, and memmove lowering.
+  ///
+  /// If DstAlign is zero that means it's safe to destination alignment can
+  /// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't
+  /// a need to check it against alignment requirement, probably because the
+  /// source does not need to be loaded. If 'IsMemset' is true, that means it's
+  /// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of
+  /// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
+  /// does not need to be loaded.  It returns EVT::Other if the type should be
+  /// determined using generic target-independent logic.
+  virtual EVT getOptimalMemOpType(uint64_t /*Size*/,
+                                  unsigned /*DstAlign*/, unsigned /*SrcAlign*/,
+                                  bool /*IsMemset*/,
+                                  bool /*ZeroMemset*/,
+                                  bool /*MemcpyStrSrc*/,
+                                  MachineFunction &/*MF*/) const {
+    return MVT::Other;
+  }
+
+  /// Returns true if it's safe to use load / store of the specified type to
+  /// expand memcpy / memset inline.
+  ///
+  /// This is mostly true for all types except for some special cases. For
+  /// example, on X86 targets without SSE2 f64 load / store are done with fldl /
+  /// fstpl which also does type conversion. Note the specified type doesn't
+  /// have to be legal as the hook is used before type legalization.
+  virtual bool isSafeMemOpType(MVT /*VT*/) const { return true; }
+
+  /// Determine if we should use _setjmp or setjmp to implement llvm.setjmp.
+  bool usesUnderscoreSetJmp() const {
+    return UseUnderscoreSetJmp;
+  }
+
+  /// Determine if we should use _longjmp or longjmp to implement llvm.longjmp.
+  bool usesUnderscoreLongJmp() const {
+    return UseUnderscoreLongJmp;
+  }
+
+  /// Return lower limit for number of blocks in a jump table.
+  unsigned getMinimumJumpTableEntries() const;
+
+  /// Return lower limit of the density in a jump table.
+  unsigned getMinimumJumpTableDensity(bool OptForSize) const;
+
+  /// Return upper limit for number of entries in a jump table.
+  /// Zero if no limit.
+  unsigned getMaximumJumpTableSize() const;
+
+  virtual bool isJumpTableRelative() const {
+    return TM.isPositionIndependent();
+  }
+
+  /// If a physical register, this specifies the register that
+  /// llvm.savestack/llvm.restorestack should save and restore.
+  unsigned getStackPointerRegisterToSaveRestore() const {
+    return StackPointerRegisterToSaveRestore;
+  }
+
+  /// If a physical register, this returns the register that receives the
+  /// exception address on entry to an EH pad.
+  virtual unsigned
+  getExceptionPointerRegister(const Constant *PersonalityFn) const {
+    // 0 is guaranteed to be the NoRegister value on all targets
+    return 0;
+  }
+
+  /// If a physical register, this returns the register that receives the
+  /// exception typeid on entry to a landing pad.
+  virtual unsigned
+  getExceptionSelectorRegister(const Constant *PersonalityFn) const {
+    // 0 is guaranteed to be the NoRegister value on all targets
+    return 0;
+  }
+
+  virtual bool needsFixedCatchObjects() const {
+    report_fatal_error("Funclet EH is not implemented for this target");
+  }
+
+  /// Returns the target's jmp_buf size in bytes (if never set, the default is
+  /// 200)
+  unsigned getJumpBufSize() const {
+    return JumpBufSize;
+  }
+
+  /// Returns the target's jmp_buf alignment in bytes (if never set, the default
+  /// is 0)
+  unsigned getJumpBufAlignment() const {
+    return JumpBufAlignment;
+  }
+
+  /// Return the minimum stack alignment of an argument.
+  unsigned getMinStackArgumentAlignment() const {
+    return MinStackArgumentAlignment;
+  }
+
+  /// Return the minimum function alignment.
+  unsigned getMinFunctionAlignment() const {
+    return MinFunctionAlignment;
+  }
+
+  /// Return the preferred function alignment.
+  unsigned getPrefFunctionAlignment() const {
+    return PrefFunctionAlignment;
+  }
+
+  /// Return the preferred loop alignment.
+  virtual unsigned getPrefLoopAlignment(MachineLoop *ML = nullptr) const {
+    return PrefLoopAlignment;
+  }
+
+  /// If the target has a standard location for the stack protector guard,
+  /// returns the address of that location. Otherwise, returns nullptr.
+  /// DEPRECATED: please override useLoadStackGuardNode and customize
+  ///             LOAD_STACK_GUARD, or customize @llvm.stackguard().
+  virtual Value *getIRStackGuard(IRBuilder<> &IRB) const;
+
+  /// Inserts necessary declarations for SSP (stack protection) purpose.
+  /// Should be used only when getIRStackGuard returns nullptr.
+  virtual void insertSSPDeclarations(Module &M) const;
+
+  /// Return the variable that's previously inserted by insertSSPDeclarations,
+  /// if any, otherwise return nullptr. Should be used only when
+  /// getIRStackGuard returns nullptr.
+  virtual Value *getSDagStackGuard(const Module &M) const;
+
+  /// If this function returns true, stack protection checks should XOR the
+  /// frame pointer (or whichever pointer is used to address locals) into the
+  /// stack guard value before checking it. getIRStackGuard must return nullptr
+  /// if this returns true.
+  virtual bool useStackGuardXorFP() const { return false; }
+
+  /// If the target has a standard stack protection check function that
+  /// performs validation and error handling, returns the function. Otherwise,
+  /// returns nullptr. Must be previously inserted by insertSSPDeclarations.
+  /// Should be used only when getIRStackGuard returns nullptr.
+  virtual Value *getSSPStackGuardCheck(const Module &M) const;
+
+protected:
+  Value *getDefaultSafeStackPointerLocation(IRBuilder<> &IRB,
+                                            bool UseTLS) const;
+
+public:
+  /// Returns the target-specific address of the unsafe stack pointer.
+  virtual Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const;
+
+  /// Returns the name of the symbol used to emit stack probes or the empty
+  /// string if not applicable.
+  virtual StringRef getStackProbeSymbolName(MachineFunction &MF) const {
+    return "";
+  }
+
+  /// Returns true if a cast between SrcAS and DestAS is a noop.
+  virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
+    return false;
+  }
+
+  /// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we
+  /// are happy to sink it into basic blocks.
+  virtual bool isCheapAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
+    return isNoopAddrSpaceCast(SrcAS, DestAS);
+  }
+
+  /// Return true if the pointer arguments to CI should be aligned by aligning
+  /// the object whose address is being passed. If so then MinSize is set to the
+  /// minimum size the object must be to be aligned and PrefAlign is set to the
+  /// preferred alignment.
+  virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/,
+                                      unsigned & /*PrefAlign*/) const {
+    return false;
+  }
+
+  //===--------------------------------------------------------------------===//
+  /// \name Helpers for TargetTransformInfo implementations
+  /// @{
+
+  /// Get the ISD node that corresponds to the Instruction class opcode.
+  int InstructionOpcodeToISD(unsigned Opcode) const;
+
+  /// Estimate the cost of type-legalization and the legalized type.
+  std::pair<int, MVT> getTypeLegalizationCost(const DataLayout &DL,
+                                              Type *Ty) const;
+
+  /// @}
+
+  //===--------------------------------------------------------------------===//
+  /// \name Helpers for atomic expansion.
+  /// @{
+
+  /// Returns the maximum atomic operation size (in bits) supported by
+  /// the backend. Atomic operations greater than this size (as well
+  /// as ones that are not naturally aligned), will be expanded by
+  /// AtomicExpandPass into an __atomic_* library call.
+  unsigned getMaxAtomicSizeInBitsSupported() const {
+    return MaxAtomicSizeInBitsSupported;
+  }
+
+  /// Returns the size of the smallest cmpxchg or ll/sc instruction
+  /// the backend supports.  Any smaller operations are widened in
+  /// AtomicExpandPass.
+  ///
+  /// Note that *unlike* operations above the maximum size, atomic ops
+  /// are still natively supported below the minimum; they just
+  /// require a more complex expansion.
+  unsigned getMinCmpXchgSizeInBits() const { return MinCmpXchgSizeInBits; }
+
+  /// Whether the target supports unaligned atomic operations.
+  bool supportsUnalignedAtomics() const { return SupportsUnalignedAtomics; }
+
+  /// Whether AtomicExpandPass should automatically insert fences and reduce
+  /// ordering for this atomic. This should be true for most architectures with
+  /// weak memory ordering. Defaults to false.
+  virtual bool shouldInsertFencesForAtomic(const Instruction *I) const {
+    return false;
+  }
+
+  /// Perform a load-linked operation on Addr, returning a "Value *" with the
+  /// corresponding pointee type. This may entail some non-trivial operations to
+  /// truncate or reconstruct types that will be illegal in the backend. See
+  /// ARMISelLowering for an example implementation.
+  virtual Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
+                                AtomicOrdering Ord) const {
+    llvm_unreachable("Load linked unimplemented on this target");
+  }
+
+  /// Perform a store-conditional operation to Addr. Return the status of the
+  /// store. This should be 0 if the store succeeded, non-zero otherwise.
+  virtual Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
+                                      Value *Addr, AtomicOrdering Ord) const {
+    llvm_unreachable("Store conditional unimplemented on this target");
+  }
+
+  /// Inserts in the IR a target-specific intrinsic specifying a fence.
+  /// It is called by AtomicExpandPass before expanding an
+  ///   AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad
+  ///   if shouldInsertFencesForAtomic returns true.
+  ///
+  /// Inst is the original atomic instruction, prior to other expansions that
+  /// may be performed.
+  ///
+  /// This function should either return a nullptr, or a pointer to an IR-level
+  ///   Instruction*. Even complex fence sequences can be represented by a
+  ///   single Instruction* through an intrinsic to be lowered later.
+  /// Backends should override this method to produce target-specific intrinsic
+  ///   for their fences.
+  /// FIXME: Please note that the default implementation here in terms of
+  ///   IR-level fences exists for historical/compatibility reasons and is
+  ///   *unsound* ! Fences cannot, in general, be used to restore sequential
+  ///   consistency. For example, consider the following example:
+  /// atomic<int> x = y = 0;
+  /// int r1, r2, r3, r4;
+  /// Thread 0:
+  ///   x.store(1);
+  /// Thread 1:
+  ///   y.store(1);
+  /// Thread 2:
+  ///   r1 = x.load();
+  ///   r2 = y.load();
+  /// Thread 3:
+  ///   r3 = y.load();
+  ///   r4 = x.load();
+  ///  r1 = r3 = 1 and r2 = r4 = 0 is impossible as long as the accesses are all
+  ///  seq_cst. But if they are lowered to monotonic accesses, no amount of
+  ///  IR-level fences can prevent it.
+  /// @{
+  virtual Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst,
+                                        AtomicOrdering Ord) const {
+    if (isReleaseOrStronger(Ord) && Inst->hasAtomicStore())
+      return Builder.CreateFence(Ord);
+    else
+      return nullptr;
+  }
+
+  virtual Instruction *emitTrailingFence(IRBuilder<> &Builder,
+                                         Instruction *Inst,
+                                         AtomicOrdering Ord) const {
+    if (isAcquireOrStronger(Ord))
+      return Builder.CreateFence(Ord);
+    else
+      return nullptr;
+  }
+  /// @}
+
+  // Emits code that executes when the comparison result in the ll/sc
+  // expansion of a cmpxchg instruction is such that the store-conditional will
+  // not execute.  This makes it possible to balance out the load-linked with
+  // a dedicated instruction, if desired.
+  // E.g., on ARM, if ldrex isn't followed by strex, the exclusive monitor would
+  // be unnecessarily held, except if clrex, inserted by this hook, is executed.
+  virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const {}
+
+  /// Returns true if the given (atomic) store should be expanded by the
+  /// IR-level AtomicExpand pass into an "atomic xchg" which ignores its input.
+  virtual bool shouldExpandAtomicStoreInIR(StoreInst *SI) const {
+    return false;
+  }
+
+  /// Returns true if arguments should be sign-extended in lib calls.
+  virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
+    return IsSigned;
+  }
+
+  /// Returns how the given (atomic) load should be expanded by the
+  /// IR-level AtomicExpand pass.
+  virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+    return AtomicExpansionKind::None;
+  }
+
+  /// Returns true if the given atomic cmpxchg should be expanded by the
+  /// IR-level AtomicExpand pass into a load-linked/store-conditional sequence
+  /// (through emitLoadLinked() and emitStoreConditional()).
+  virtual bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
+    return false;
+  }
+
+  /// Returns how the IR-level AtomicExpand pass should expand the given
+  /// AtomicRMW, if at all. Default is to never expand.
+  virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const {
+    return AtomicExpansionKind::None;
+  }
+
+  /// On some platforms, an AtomicRMW that never actually modifies the value
+  /// (such as fetch_add of 0) can be turned into a fence followed by an
+  /// atomic load. This may sound useless, but it makes it possible for the
+  /// processor to keep the cacheline shared, dramatically improving
+  /// performance. And such idempotent RMWs are useful for implementing some
+  /// kinds of locks, see for example (justification + benchmarks):
+  /// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf
+  /// This method tries doing that transformation, returning the atomic load if
+  /// it succeeds, and nullptr otherwise.
+  /// If shouldExpandAtomicLoadInIR returns true on that load, it will undergo
+  /// another round of expansion.
+  virtual LoadInst *
+  lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const {
+    return nullptr;
+  }
+
+  /// Returns how the platform's atomic operations are extended (ZERO_EXTEND,
+  /// SIGN_EXTEND, or ANY_EXTEND).
+  virtual ISD::NodeType getExtendForAtomicOps() const {
+    return ISD::ZERO_EXTEND;
+  }
+
+  /// @}
+
+  /// Returns true if we should normalize
+  /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
+  /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely
+  /// that it saves us from materializing N0 and N1 in an integer register.
+  /// Targets that are able to perform and/or on flags should return false here.
+  virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context,
+                                               EVT VT) const {
+    // If a target has multiple condition registers, then it likely has logical
+    // operations on those registers.
+    if (hasMultipleConditionRegisters())
+      return false;
+    // Only do the transform if the value won't be split into multiple
+    // registers.
+    LegalizeTypeAction Action = getTypeAction(Context, VT);
+    return Action != TypeExpandInteger && Action != TypeExpandFloat &&
+      Action != TypeSplitVector;
+  }
+
+  /// Return true if a select of constants (select Cond, C1, C2) should be
+  /// transformed into simple math ops with the condition value. For example:
+  /// select Cond, C1, C1-1 --> add (zext Cond), C1-1
+  virtual bool convertSelectOfConstantsToMath(EVT VT) const {
+    return false;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // TargetLowering Configuration Methods - These methods should be invoked by
+  // the derived class constructor to configure this object for the target.
+  //
+protected:
+  /// Specify how the target extends the result of integer and floating point
+  /// boolean values from i1 to a wider type.  See getBooleanContents.
+  void setBooleanContents(BooleanContent Ty) {
+    BooleanContents = Ty;
+    BooleanFloatContents = Ty;
+  }
+
+  /// Specify how the target extends the result of integer and floating point
+  /// boolean values from i1 to a wider type.  See getBooleanContents.
+  void setBooleanContents(BooleanContent IntTy, BooleanContent FloatTy) {
+    BooleanContents = IntTy;
+    BooleanFloatContents = FloatTy;
+  }
+
+  /// Specify how the target extends the result of a vector boolean value from a
+  /// vector of i1 to a wider type.  See getBooleanContents.
+  void setBooleanVectorContents(BooleanContent Ty) {
+    BooleanVectorContents = Ty;
+  }
+
+  /// Specify the target scheduling preference.
+  void setSchedulingPreference(Sched::Preference Pref) {
+    SchedPreferenceInfo = Pref;
+  }
+
+  /// Indicate whether this target prefers to use _setjmp to implement
+  /// llvm.setjmp or the version without _.  Defaults to false.
+  void setUseUnderscoreSetJmp(bool Val) {
+    UseUnderscoreSetJmp = Val;
+  }
+
+  /// Indicate whether this target prefers to use _longjmp to implement
+  /// llvm.longjmp or the version without _.  Defaults to false.
+  void setUseUnderscoreLongJmp(bool Val) {
+    UseUnderscoreLongJmp = Val;
+  }
+
+  /// Indicate the minimum number of blocks to generate jump tables.
+  void setMinimumJumpTableEntries(unsigned Val);
+
+  /// Indicate the maximum number of entries in jump tables.
+  /// Set to zero to generate unlimited jump tables.
+  void setMaximumJumpTableSize(unsigned);
+
+  /// If set to a physical register, this specifies the register that
+  /// llvm.savestack/llvm.restorestack should save and restore.
+  void setStackPointerRegisterToSaveRestore(unsigned R) {
+    StackPointerRegisterToSaveRestore = R;
+  }
+
+  /// Tells the code generator that the target has multiple (allocatable)
+  /// condition registers that can be used to store the results of comparisons
+  /// for use by selects and conditional branches. With multiple condition
+  /// registers, the code generator will not aggressively sink comparisons into
+  /// the blocks of their users.
+  void setHasMultipleConditionRegisters(bool hasManyRegs = true) {
+    HasMultipleConditionRegisters = hasManyRegs;
+  }
+
+  /// Tells the code generator that the target has BitExtract instructions.
+  /// The code generator will aggressively sink "shift"s into the blocks of
+  /// their users if the users will generate "and" instructions which can be
+  /// combined with "shift" to BitExtract instructions.
+  void setHasExtractBitsInsn(bool hasExtractInsn = true) {
+    HasExtractBitsInsn = hasExtractInsn;
+  }
+
+  /// Tells the code generator not to expand logic operations on comparison
+  /// predicates into separate sequences that increase the amount of flow
+  /// control.
+  void setJumpIsExpensive(bool isExpensive = true);
+
+  /// Tells the code generator that this target supports floating point
+  /// exceptions and cares about preserving floating point exception behavior.
+  void setHasFloatingPointExceptions(bool FPExceptions = true) {
+    HasFloatingPointExceptions = FPExceptions;
+  }
+
+  /// Tells the code generator which bitwidths to bypass.
+  void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) {
+    BypassSlowDivWidths[SlowBitWidth] = FastBitWidth;
+  }
+
+  /// Add the specified register class as an available regclass for the
+  /// specified value type. This indicates the selector can handle values of
+  /// that class natively.
+  void addRegisterClass(MVT VT, const TargetRegisterClass *RC) {
+    assert((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT));
+    RegClassForVT[VT.SimpleTy] = RC;
+  }
+
+  /// Return the largest legal super-reg register class of the register class
+  /// for the specified type and its associated "cost".
+  virtual std::pair<const TargetRegisterClass *, uint8_t>
+  findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const;
+
+  /// Once all of the register classes are added, this allows us to compute
+  /// derived properties we expose.
+  void computeRegisterProperties(const TargetRegisterInfo *TRI);
+
+  /// Indicate that the specified operation does not work with the specified
+  /// type and indicate what to do about it. Note that VT may refer to either
+  /// the type of a result or that of an operand of Op.
+  void setOperationAction(unsigned Op, MVT VT,
+                          LegalizeAction Action) {
+    assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!");
+    OpActions[(unsigned)VT.SimpleTy][Op] = Action;
+  }
+
+  /// Indicate that the specified load with extension does not work with the
+  /// specified type and indicate what to do about it.
+  void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT,
+                        LegalizeAction Action) {
+    assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() &&
+           MemVT.isValid() && "Table isn't big enough!");
+    assert((unsigned)Action < 0x10 && "too many bits for bitfield array");
+    unsigned Shift = 4 * ExtType;
+    LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= ~((uint16_t)0xF << Shift);
+    LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= (uint16_t)Action << Shift;
+  }
+
+  /// Indicate that the specified truncating store does not work with the
+  /// specified type and indicate what to do about it.
+  void setTruncStoreAction(MVT ValVT, MVT MemVT,
+                           LegalizeAction Action) {
+    assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!");
+    TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action;
+  }
+
+  /// Indicate that the specified indexed load does or does not work with the
+  /// specified type and indicate what to do abort it.
+  ///
+  /// NOTE: All indexed mode loads are initialized to Expand in
+  /// TargetLowering.cpp
+  void setIndexedLoadAction(unsigned IdxMode, MVT VT,
+                            LegalizeAction Action) {
+    assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE &&
+           (unsigned)Action < 0xf && "Table isn't big enough!");
+    // Load action are kept in the upper half.
+    IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0xf0;
+    IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action) <<4;
+  }
+
+  /// Indicate that the specified indexed store does or does not work with the
+  /// specified type and indicate what to do about it.
+  ///
+  /// NOTE: All indexed mode stores are initialized to Expand in
+  /// TargetLowering.cpp
+  void setIndexedStoreAction(unsigned IdxMode, MVT VT,
+                             LegalizeAction Action) {
+    assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE &&
+           (unsigned)Action < 0xf && "Table isn't big enough!");
+    // Store action are kept in the lower half.
+    IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0x0f;
+    IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action);
+  }
+
+  /// Indicate that the specified condition code is or isn't supported on the
+  /// target and indicate what to do about it.
+  void setCondCodeAction(ISD::CondCode CC, MVT VT,
+                         LegalizeAction Action) {
+    assert(VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) &&
+           "Table isn't big enough!");
+    assert((unsigned)Action < 0x10 && "too many bits for bitfield array");
+    /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the 32-bit
+    /// value and the upper 29 bits index into the second dimension of the array
+    /// to select what 32-bit value to use.
+    uint32_t Shift = 4 * (VT.SimpleTy & 0x7);
+    CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift);
+    CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift;
+  }
+
+  /// If Opc/OrigVT is specified as being promoted, the promotion code defaults
+  /// to trying a larger integer/fp until it can find one that works. If that
+  /// default is insufficient, this method can be used by the target to override
+  /// the default.
+  void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) {
+    PromoteToType[std::make_pair(Opc, OrigVT.SimpleTy)] = DestVT.SimpleTy;
+  }
+
+  /// Convenience method to set an operation to Promote and specify the type
+  /// in a single call.
+  void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) {
+    setOperationAction(Opc, OrigVT, Promote);
+    AddPromotedToType(Opc, OrigVT, DestVT);
+  }
+
+  /// Targets should invoke this method for each target independent node that
+  /// they want to provide a custom DAG combiner for by implementing the
+  /// PerformDAGCombine virtual method.
+  void setTargetDAGCombine(ISD::NodeType NT) {
+    assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray));
+    TargetDAGCombineArray[NT >> 3] |= 1 << (NT&7);
+  }
+
+  /// Set the target's required jmp_buf buffer size (in bytes); default is 200
+  void setJumpBufSize(unsigned Size) {
+    JumpBufSize = Size;
+  }
+
+  /// Set the target's required jmp_buf buffer alignment (in bytes); default is
+  /// 0
+  void setJumpBufAlignment(unsigned Align) {
+    JumpBufAlignment = Align;
+  }
+
+  /// Set the target's minimum function alignment (in log2(bytes))
+  void setMinFunctionAlignment(unsigned Align) {
+    MinFunctionAlignment = Align;
+  }
+
+  /// Set the target's preferred function alignment.  This should be set if
+  /// there is a performance benefit to higher-than-minimum alignment (in
+  /// log2(bytes))
+  void setPrefFunctionAlignment(unsigned Align) {
+    PrefFunctionAlignment = Align;
+  }
+
+  /// Set the target's preferred loop alignment. Default alignment is zero, it
+  /// means the target does not care about loop alignment.  The alignment is
+  /// specified in log2(bytes). The target may also override
+  /// getPrefLoopAlignment to provide per-loop values.
+  void setPrefLoopAlignment(unsigned Align) {
+    PrefLoopAlignment = Align;
+  }
+
+  /// Set the minimum stack alignment of an argument (in log2(bytes)).
+  void setMinStackArgumentAlignment(unsigned Align) {
+    MinStackArgumentAlignment = Align;
+  }
+
+  /// Set the maximum atomic operation size supported by the
+  /// backend. Atomic operations greater than this size (as well as
+  /// ones that are not naturally aligned), will be expanded by
+  /// AtomicExpandPass into an __atomic_* library call.
+  void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits) {
+    MaxAtomicSizeInBitsSupported = SizeInBits;
+  }
+
+  /// Sets the minimum cmpxchg or ll/sc size supported by the backend.
+  void setMinCmpXchgSizeInBits(unsigned SizeInBits) {
+    MinCmpXchgSizeInBits = SizeInBits;
+  }
+
+  /// Sets whether unaligned atomic operations are supported.
+  void setSupportsUnalignedAtomics(bool UnalignedSupported) {
+    SupportsUnalignedAtomics = UnalignedSupported;
+  }
+
+public:
+  //===--------------------------------------------------------------------===//
+  // Addressing mode description hooks (used by LSR etc).
+  //
+
+  /// CodeGenPrepare sinks address calculations into the same BB as Load/Store
+  /// instructions reading the address. This allows as much computation as
+  /// possible to be done in the address mode for that operand. This hook lets
+  /// targets also pass back when this should be done on intrinsics which
+  /// load/store.
+  virtual bool getAddrModeArguments(IntrinsicInst * /*I*/,
+                                    SmallVectorImpl<Value*> &/*Ops*/,
+                                    Type *&/*AccessTy*/) const {
+    return false;
+  }
+
+  /// This represents an addressing mode of:
+  ///    BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
+  /// If BaseGV is null,  there is no BaseGV.
+  /// If BaseOffs is zero, there is no base offset.
+  /// If HasBaseReg is false, there is no base register.
+  /// If Scale is zero, there is no ScaleReg.  Scale of 1 indicates a reg with
+  /// no scale.
+  struct AddrMode {
+    GlobalValue *BaseGV = nullptr;
+    int64_t      BaseOffs = 0;
+    bool         HasBaseReg = false;
+    int64_t      Scale = 0;
+    AddrMode() = default;
+  };
+
+  /// Return true if the addressing mode represented by AM is legal for this
+  /// target, for a load/store of the specified type.
+  ///
+  /// The type may be VoidTy, in which case only return true if the addressing
+  /// mode is legal for a load/store of any legal type.  TODO: Handle
+  /// pre/postinc as well.
+  ///
+  /// If the address space cannot be determined, it will be -1.
+  ///
+  /// TODO: Remove default argument
+  virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
+                                     Type *Ty, unsigned AddrSpace,
+                                     Instruction *I = nullptr) const;
+
+  /// \brief Return the cost of the scaling factor used in the addressing mode
+  /// represented by AM for this target, for a load/store of the specified type.
+  ///
+  /// If the AM is supported, the return value must be >= 0.
+  /// If the AM is not supported, it returns a negative value.
+  /// TODO: Handle pre/postinc as well.
+  /// TODO: Remove default argument
+  virtual int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM,
+                                   Type *Ty, unsigned AS = 0) const {
+    // Default: assume that any scaling factor used in a legal AM is free.
+    if (isLegalAddressingMode(DL, AM, Ty, AS))
+      return 0;
+    return -1;
+  }
+
+  /// Return true if the specified immediate is legal icmp immediate, that is
+  /// the target has icmp instructions which can compare a register against the
+  /// immediate without having to materialize the immediate into a register.
+  virtual bool isLegalICmpImmediate(int64_t) const {
+    return true;
+  }
+
+  /// Return true if the specified immediate is legal add immediate, that is the
+  /// target has add instructions which can add a register with the immediate
+  /// without having to materialize the immediate into a register.
+  virtual bool isLegalAddImmediate(int64_t) const {
+    return true;
+  }
+
+  /// Return true if it's significantly cheaper to shift a vector by a uniform
+  /// scalar than by an amount which will vary across each lane. On x86, for
+  /// example, there is a "psllw" instruction for the former case, but no simple
+  /// instruction for a general "a << b" operation on vectors.
+  virtual bool isVectorShiftByScalarCheap(Type *Ty) const {
+    return false;
+  }
+
+  /// Returns true if the opcode is a commutative binary operation.
+  virtual bool isCommutativeBinOp(unsigned Opcode) const {
+    // FIXME: This should get its info from the td file.
+    switch (Opcode) {
+    case ISD::ADD:
+    case ISD::SMIN:
+    case ISD::SMAX:
+    case ISD::UMIN:
+    case ISD::UMAX:
+    case ISD::MUL:
+    case ISD::MULHU:
+    case ISD::MULHS:
+    case ISD::SMUL_LOHI:
+    case ISD::UMUL_LOHI:
+    case ISD::FADD:
+    case ISD::FMUL:
+    case ISD::AND:
+    case ISD::OR:
+    case ISD::XOR:
+    case ISD::SADDO:
+    case ISD::UADDO:
+    case ISD::ADDC:
+    case ISD::ADDE:
+    case ISD::FMINNUM:
+    case ISD::FMAXNUM:
+    case ISD::FMINNAN:
+    case ISD::FMAXNAN:
+      return true;
+    default: return false;
+    }
+  }
+
+  /// Return true if it's free to truncate a value of type FromTy to type
+  /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
+  /// by referencing its sub-register AX.
+  /// Targets must return false when FromTy <= ToTy.
+  virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const {
+    return false;
+  }
+
+  /// Return true if a truncation from FromTy to ToTy is permitted when deciding
+  /// whether a call is in tail position. Typically this means that both results
+  /// would be assigned to the same register or stack slot, but it could mean
+  /// the target performs adequate checks of its own before proceeding with the
+  /// tail call.  Targets must return false when FromTy <= ToTy.
+  virtual bool allowTruncateForTailCall(Type *FromTy, Type *ToTy) const {
+    return false;
+  }
+
+  virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const {
+    return false;
+  }
+
+  virtual bool isProfitableToHoist(Instruction *I) const { return true; }
+
+  /// Return true if the extension represented by \p I is free.
+  /// Unlikely the is[Z|FP]ExtFree family which is based on types,
+  /// this method can use the context provided by \p I to decide
+  /// whether or not \p I is free.
+  /// This method extends the behavior of the is[Z|FP]ExtFree family.
+  /// In other words, if is[Z|FP]Free returns true, then this method
+  /// returns true as well. The converse is not true.
+  /// The target can perform the adequate checks by overriding isExtFreeImpl.
+  /// \pre \p I must be a sign, zero, or fp extension.
+  bool isExtFree(const Instruction *I) const {
+    switch (I->getOpcode()) {
+    case Instruction::FPExt:
+      if (isFPExtFree(EVT::getEVT(I->getType()),
+                      EVT::getEVT(I->getOperand(0)->getType())))
+        return true;
+      break;
+    case Instruction::ZExt:
+      if (isZExtFree(I->getOperand(0)->getType(), I->getType()))
+        return true;
+      break;
+    case Instruction::SExt:
+      break;
+    default:
+      llvm_unreachable("Instruction is not an extension");
+    }
+    return isExtFreeImpl(I);
+  }
+
+  /// Return true if \p Load and \p Ext can form an ExtLoad.
+  /// For example, in AArch64
+  ///   %L = load i8, i8* %ptr
+  ///   %E = zext i8 %L to i32
+  /// can be lowered into one load instruction
+  ///   ldrb w0, [x0]
+  bool isExtLoad(const LoadInst *Load, const Instruction *Ext,
+                 const DataLayout &DL) const {
+    EVT VT = getValueType(DL, Ext->getType());
+    EVT LoadVT = getValueType(DL, Load->getType());
+
+    // If the load has other users and the truncate is not free, the ext
+    // probably isn't free.
+    if (!Load->hasOneUse() && (isTypeLegal(LoadVT) || !isTypeLegal(VT)) &&
+        !isTruncateFree(Ext->getType(), Load->getType()))
+      return false;
+
+    // Check whether the target supports casts folded into loads.
+    unsigned LType;
+    if (isa<ZExtInst>(Ext))
+      LType = ISD::ZEXTLOAD;
+    else {
+      assert(isa<SExtInst>(Ext) && "Unexpected ext type!");
+      LType = ISD::SEXTLOAD;
+    }
+
+    return isLoadExtLegal(LType, VT, LoadVT);
+  }
+
+  /// Return true if any actual instruction that defines a value of type FromTy
+  /// implicitly zero-extends the value to ToTy in the result register.
+  ///
+  /// The function should return true when it is likely that the truncate can
+  /// be freely folded with an instruction defining a value of FromTy. If
+  /// the defining instruction is unknown (because you're looking at a
+  /// function argument, PHI, etc.) then the target may require an
+  /// explicit truncate, which is not necessarily free, but this function
+  /// does not deal with those cases.
+  /// Targets must return false when FromTy >= ToTy.
+  virtual bool isZExtFree(Type *FromTy, Type *ToTy) const {
+    return false;
+  }
+
+  virtual bool isZExtFree(EVT FromTy, EVT ToTy) const {
+    return false;
+  }
+
+  /// Return true if the target supplies and combines to a paired load
+  /// two loaded values of type LoadedType next to each other in memory.
+  /// RequiredAlignment gives the minimal alignment constraints that must be met
+  /// to be able to select this paired load.
+  ///
+  /// This information is *not* used to generate actual paired loads, but it is
+  /// used to generate a sequence of loads that is easier to combine into a
+  /// paired load.
+  /// For instance, something like this:
+  /// a = load i64* addr
+  /// b = trunc i64 a to i32
+  /// c = lshr i64 a, 32
+  /// d = trunc i64 c to i32
+  /// will be optimized into:
+  /// b = load i32* addr1
+  /// d = load i32* addr2
+  /// Where addr1 = addr2 +/- sizeof(i32).
+  ///
+  /// In other words, unless the target performs a post-isel load combining,
+  /// this information should not be provided because it will generate more
+  /// loads.
+  virtual bool hasPairedLoad(EVT /*LoadedType*/,
+                             unsigned & /*RequiredAlignment*/) const {
+    return false;
+  }
+
+  /// \brief Get the maximum supported factor for interleaved memory accesses.
+  /// Default to be the minimum interleave factor: 2.
+  virtual unsigned getMaxSupportedInterleaveFactor() const { return 2; }
+
+  /// \brief Lower an interleaved load to target specific intrinsics. Return
+  /// true on success.
+  ///
+  /// \p LI is the vector load instruction.
+  /// \p Shuffles is the shufflevector list to DE-interleave the loaded vector.
+  /// \p Indices is the corresponding indices for each shufflevector.
+  /// \p Factor is the interleave factor.
+  virtual bool lowerInterleavedLoad(LoadInst *LI,
+                                    ArrayRef<ShuffleVectorInst *> Shuffles,
+                                    ArrayRef<unsigned> Indices,
+                                    unsigned Factor) const {
+    return false;
+  }
+
+  /// \brief Lower an interleaved store to target specific intrinsics. Return
+  /// true on success.
+  ///
+  /// \p SI is the vector store instruction.
+  /// \p SVI is the shufflevector to RE-interleave the stored vector.
+  /// \p Factor is the interleave factor.
+  virtual bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
+                                     unsigned Factor) const {
+    return false;
+  }
+
+  /// Return true if zero-extending the specific node Val to type VT2 is free
+  /// (either because it's implicitly zero-extended such as ARM ldrb / ldrh or
+  /// because it's folded such as X86 zero-extending loads).
+  virtual bool isZExtFree(SDValue Val, EVT VT2) const {
+    return isZExtFree(Val.getValueType(), VT2);
+  }
+
+  /// Return true if an fpext operation is free (for instance, because
+  /// single-precision floating-point numbers are implicitly extended to
+  /// double-precision).
+  virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const {
+    assert(SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() &&
+           "invalid fpext types");
+    return false;
+  }
+
+  /// Return true if an fpext operation input to an \p Opcode operation is free
+  /// (for instance, because half-precision floating-point numbers are
+  /// implicitly extended to float-precision) for an FMA instruction.
+  virtual bool isFPExtFoldable(unsigned Opcode, EVT DestVT, EVT SrcVT) const {
+    assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
+           "invalid fpext types");
+    return isFPExtFree(DestVT, SrcVT);
+  }
+
+  /// Return true if folding a vector load into ExtVal (a sign, zero, or any
+  /// extend node) is profitable.
+  virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const { return false; }
+
+  /// Return true if an fneg operation is free to the point where it is never
+  /// worthwhile to replace it with a bitwise operation.
+  virtual bool isFNegFree(EVT VT) const {
+    assert(VT.isFloatingPoint());
+    return false;
+  }
+
+  /// Return true if an fabs operation is free to the point where it is never
+  /// worthwhile to replace it with a bitwise operation.
+  virtual bool isFAbsFree(EVT VT) const {
+    assert(VT.isFloatingPoint());
+    return false;
+  }
+
+  /// Return true if an FMA operation is faster than a pair of fmul and fadd
+  /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
+  /// returns true, otherwise fmuladd is expanded to fmul + fadd.
+  ///
+  /// NOTE: This may be called before legalization on types for which FMAs are
+  /// not legal, but should return true if those types will eventually legalize
+  /// to types that support FMAs. After legalization, it will only be called on
+  /// types that support FMAs (via Legal or Custom actions)
+  virtual bool isFMAFasterThanFMulAndFAdd(EVT) const {
+    return false;
+  }
+
+  /// Return true if it's profitable to narrow operations of type VT1 to
+  /// VT2. e.g. on x86, it's profitable to narrow from i32 to i8 but not from
+  /// i32 to i16.
+  virtual bool isNarrowingProfitable(EVT /*VT1*/, EVT /*VT2*/) const {
+    return false;
+  }
+
+  /// \brief Return true if it is beneficial to convert a load of a constant to
+  /// just the constant itself.
+  /// On some targets it might be more efficient to use a combination of
+  /// arithmetic instructions to materialize the constant instead of loading it
+  /// from a constant pool.
+  virtual bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
+                                                 Type *Ty) const {
+    return false;
+  }
+
+  /// Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type
+  /// from this source type with this index. This is needed because
+  /// EXTRACT_SUBVECTOR usually has custom lowering that depends on the index of
+  /// the first element, and only the target knows which lowering is cheap.
+  virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
+                                       unsigned Index) const {
+    return false;
+  }
+
+  // Return true if it is profitable to use a scalar input to a BUILD_VECTOR
+  // even if the vector itself has multiple uses.
+  virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const {
+    return false;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Runtime Library hooks
+  //
+
+  /// Rename the default libcall routine name for the specified libcall.
+  void setLibcallName(RTLIB::Libcall Call, const char *Name) {
+    LibcallRoutineNames[Call] = Name;
+  }
+
+  /// Get the libcall routine name for the specified libcall.
+  const char *getLibcallName(RTLIB::Libcall Call) const {
+    return LibcallRoutineNames[Call];
+  }
+
+  /// Override the default CondCode to be used to test the result of the
+  /// comparison libcall against zero.
+  void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC) {
+    CmpLibcallCCs[Call] = CC;
+  }
+
+  /// Get the CondCode that's to be used to test the result of the comparison
+  /// libcall against zero.
+  ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const {
+    return CmpLibcallCCs[Call];
+  }
+
+  /// Set the CallingConv that should be used for the specified libcall.
+  void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC) {
+    LibcallCallingConvs[Call] = CC;
+  }
+
+  /// Get the CallingConv that should be used for the specified libcall.
+  CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const {
+    return LibcallCallingConvs[Call];
+  }
+
+  /// Execute target specific actions to finalize target lowering.
+  /// This is used to set extra flags in MachineFrameInformation and freezing
+  /// the set of reserved registers.
+  /// The default implementation just freezes the set of reserved registers.
+  virtual void finalizeLowering(MachineFunction &MF) const;
+
+private:
+  const TargetMachine &TM;
+
+  /// Tells the code generator that the target has multiple (allocatable)
+  /// condition registers that can be used to store the results of comparisons
+  /// for use by selects and conditional branches. With multiple condition
+  /// registers, the code generator will not aggressively sink comparisons into
+  /// the blocks of their users.
+  bool HasMultipleConditionRegisters;
+
+  /// Tells the code generator that the target has BitExtract instructions.
+  /// The code generator will aggressively sink "shift"s into the blocks of
+  /// their users if the users will generate "and" instructions which can be
+  /// combined with "shift" to BitExtract instructions.
+  bool HasExtractBitsInsn;
+
+  /// Tells the code generator to bypass slow divide or remainder
+  /// instructions. For example, BypassSlowDivWidths[32,8] tells the code
+  /// generator to bypass 32-bit integer div/rem with an 8-bit unsigned integer
+  /// div/rem when the operands are positive and less than 256.
+  DenseMap <unsigned int, unsigned int> BypassSlowDivWidths;
+
+  /// Tells the code generator that it shouldn't generate extra flow control
+  /// instructions and should attempt to combine flow control instructions via
+  /// predication.
+  bool JumpIsExpensive;
+
+  /// Whether the target supports or cares about preserving floating point
+  /// exception behavior.
+  bool HasFloatingPointExceptions;
+
+  /// This target prefers to use _setjmp to implement llvm.setjmp.
+  ///
+  /// Defaults to false.
+  bool UseUnderscoreSetJmp;
+
+  /// This target prefers to use _longjmp to implement llvm.longjmp.
+  ///
+  /// Defaults to false.
+  bool UseUnderscoreLongJmp;
+
+  /// Information about the contents of the high-bits in boolean values held in
+  /// a type wider than i1. See getBooleanContents.
+  BooleanContent BooleanContents;
+
+  /// Information about the contents of the high-bits in boolean values held in
+  /// a type wider than i1. See getBooleanContents.
+  BooleanContent BooleanFloatContents;
+
+  /// Information about the contents of the high-bits in boolean vector values
+  /// when the element type is wider than i1. See getBooleanContents.
+  BooleanContent BooleanVectorContents;
+
+  /// The target scheduling preference: shortest possible total cycles or lowest
+  /// register usage.
+  Sched::Preference SchedPreferenceInfo;
+
+  /// The size, in bytes, of the target's jmp_buf buffers
+  unsigned JumpBufSize;
+
+  /// The alignment, in bytes, of the target's jmp_buf buffers
+  unsigned JumpBufAlignment;
+
+  /// The minimum alignment that any argument on the stack needs to have.
+  unsigned MinStackArgumentAlignment;
+
+  /// The minimum function alignment (used when optimizing for size, and to
+  /// prevent explicitly provided alignment from leading to incorrect code).
+  unsigned MinFunctionAlignment;
+
+  /// The preferred function alignment (used when alignment unspecified and
+  /// optimizing for speed).
+  unsigned PrefFunctionAlignment;
+
+  /// The preferred loop alignment.
+  unsigned PrefLoopAlignment;
+
+  /// Size in bits of the maximum atomics size the backend supports.
+  /// Accesses larger than this will be expanded by AtomicExpandPass.
+  unsigned MaxAtomicSizeInBitsSupported;
+
+  /// Size in bits of the minimum cmpxchg or ll/sc operation the
+  /// backend supports.
+  unsigned MinCmpXchgSizeInBits;
+
+  /// This indicates if the target supports unaligned atomic operations.
+  bool SupportsUnalignedAtomics;
+
+  /// If set to a physical register, this specifies the register that
+  /// llvm.savestack/llvm.restorestack should save and restore.
+  unsigned StackPointerRegisterToSaveRestore;
+
+  /// This indicates the default register class to use for each ValueType the
+  /// target supports natively.
+  const TargetRegisterClass *RegClassForVT[MVT::LAST_VALUETYPE];
+  unsigned char NumRegistersForVT[MVT::LAST_VALUETYPE];
+  MVT RegisterTypeForVT[MVT::LAST_VALUETYPE];
+
+  /// This indicates the "representative" register class to use for each
+  /// ValueType the target supports natively. This information is used by the
+  /// scheduler to track register pressure. By default, the representative
+  /// register class is the largest legal super-reg register class of the
+  /// register class of the specified type. e.g. On x86, i8, i16, and i32's
+  /// representative class would be GR32.
+  const TargetRegisterClass *RepRegClassForVT[MVT::LAST_VALUETYPE];
+
+  /// This indicates the "cost" of the "representative" register class for each
+  /// ValueType. The cost is used by the scheduler to approximate register
+  /// pressure.
+  uint8_t RepRegClassCostForVT[MVT::LAST_VALUETYPE];
+
+  /// For any value types we are promoting or expanding, this contains the value
+  /// type that we are changing to.  For Expanded types, this contains one step
+  /// of the expand (e.g. i64 -> i32), even if there are multiple steps required
+  /// (e.g. i64 -> i16).  For types natively supported by the system, this holds
+  /// the same type (e.g. i32 -> i32).
+  MVT TransformToType[MVT::LAST_VALUETYPE];
+
+  /// For each operation and each value type, keep a LegalizeAction that
+  /// indicates how instruction selection should deal with the operation.  Most
+  /// operations are Legal (aka, supported natively by the target), but
+  /// operations that are not should be described.  Note that operations on
+  /// non-legal value types are not described here.
+  LegalizeAction OpActions[MVT::LAST_VALUETYPE][ISD::BUILTIN_OP_END];
+
+  /// For each load extension type and each value type, keep a LegalizeAction
+  /// that indicates how instruction selection should deal with a load of a
+  /// specific value type and extension type. Uses 4-bits to store the action
+  /// for each of the 4 load ext types.
+  uint16_t LoadExtActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE];
+
+  /// For each value type pair keep a LegalizeAction that indicates whether a
+  /// truncating store of a specific value type and truncating type is legal.
+  LegalizeAction TruncStoreActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE];
+
+  /// For each indexed mode and each value type, keep a pair of LegalizeAction
+  /// that indicates how instruction selection should deal with the load /
+  /// store.
+  ///
+  /// The first dimension is the value_type for the reference. The second
+  /// dimension represents the various modes for load store.
+  uint8_t IndexedModeActions[MVT::LAST_VALUETYPE][ISD::LAST_INDEXED_MODE];
+
+  /// For each condition code (ISD::CondCode) keep a LegalizeAction that
+  /// indicates how instruction selection should deal with the condition code.
+  ///
+  /// Because each CC action takes up 4 bits, we need to have the array size be
+  /// large enough to fit all of the value types. This can be done by rounding
+  /// up the MVT::LAST_VALUETYPE value to the next multiple of 8.
+  uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::LAST_VALUETYPE + 7) / 8];
+
+protected:
+  ValueTypeActionImpl ValueTypeActions;
+
+private:
+  LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const;
+
+  /// Targets can specify ISD nodes that they would like PerformDAGCombine
+  /// callbacks for by calling setTargetDAGCombine(), which sets a bit in this
+  /// array.
+  unsigned char
+  TargetDAGCombineArray[(ISD::BUILTIN_OP_END+CHAR_BIT-1)/CHAR_BIT];
+
+  /// For operations that must be promoted to a specific type, this holds the
+  /// destination type.  This map should be sparse, so don't hold it as an
+  /// array.
+  ///
+  /// Targets add entries to this map with AddPromotedToType(..), clients access
+  /// this with getTypeToPromoteTo(..).
+  std::map<std::pair<unsigned, MVT::SimpleValueType>, MVT::SimpleValueType>
+    PromoteToType;
+
+  /// Stores the name each libcall.
+  const char *LibcallRoutineNames[RTLIB::UNKNOWN_LIBCALL];
+
+  /// The ISD::CondCode that should be used to test the result of each of the
+  /// comparison libcall against zero.
+  ISD::CondCode CmpLibcallCCs[RTLIB::UNKNOWN_LIBCALL];
+
+  /// Stores the CallingConv that should be used for each libcall.
+  CallingConv::ID LibcallCallingConvs[RTLIB::UNKNOWN_LIBCALL];
+
+protected:
+  /// Return true if the extension represented by \p I is free.
+  /// \pre \p I is a sign, zero, or fp extension and
+  ///      is[Z|FP]ExtFree of the related types is not true.
+  virtual bool isExtFreeImpl(const Instruction *I) const { return false; }
+
+  /// Depth that GatherAllAliases should should continue looking for chain
+  /// dependencies when trying to find a more preferable chain. As an
+  /// approximation, this should be more than the number of consecutive stores
+  /// expected to be merged.
+  unsigned GatherAllAliasesMaxDepth;
+
+  /// \brief Specify maximum number of store instructions per memset call.
+  ///
+  /// When lowering \@llvm.memset this field specifies the maximum number of
+  /// store operations that may be substituted for the call to memset. Targets
+  /// must set this value based on the cost threshold for that target. Targets
+  /// should assume that the memset will be done using as many of the largest
+  /// store operations first, followed by smaller ones, if necessary, per
+  /// alignment restrictions. For example, storing 9 bytes on a 32-bit machine
+  /// with 16-bit alignment would result in four 2-byte stores and one 1-byte
+  /// store.  This only applies to setting a constant array of a constant size.
+  unsigned MaxStoresPerMemset;
+
+  /// Maximum number of stores operations that may be substituted for the call
+  /// to memset, used for functions with OptSize attribute.
+  unsigned MaxStoresPerMemsetOptSize;
+
+  /// \brief Specify maximum bytes of store instructions per memcpy call.
+  ///
+  /// When lowering \@llvm.memcpy this field specifies the maximum number of
+  /// store operations that may be substituted for a call to memcpy. Targets
+  /// must set this value based on the cost threshold for that target. Targets
+  /// should assume that the memcpy will be done using as many of the largest
+  /// store operations first, followed by smaller ones, if necessary, per
+  /// alignment restrictions. For example, storing 7 bytes on a 32-bit machine
+  /// with 32-bit alignment would result in one 4-byte store, a one 2-byte store
+  /// and one 1-byte store. This only applies to copying a constant array of
+  /// constant size.
+  unsigned MaxStoresPerMemcpy;
+
+  /// Maximum number of store operations that may be substituted for a call to
+  /// memcpy, used for functions with OptSize attribute.
+  unsigned MaxStoresPerMemcpyOptSize;
+  unsigned MaxLoadsPerMemcmp;
+  unsigned MaxLoadsPerMemcmpOptSize;
+
+  /// \brief Specify maximum bytes of store instructions per memmove call.
+  ///
+  /// When lowering \@llvm.memmove this field specifies the maximum number of
+  /// store instructions that may be substituted for a call to memmove. Targets
+  /// must set this value based on the cost threshold for that target. Targets
+  /// should assume that the memmove will be done using as many of the largest
+  /// store operations first, followed by smaller ones, if necessary, per
+  /// alignment restrictions. For example, moving 9 bytes on a 32-bit machine
+  /// with 8-bit alignment would result in nine 1-byte stores.  This only
+  /// applies to copying a constant array of constant size.
+  unsigned MaxStoresPerMemmove;
+
+  /// Maximum number of store instructions that may be substituted for a call to
+  /// memmove, used for functions with OptSize attribute.
+  unsigned MaxStoresPerMemmoveOptSize;
+
+  /// Tells the code generator that select is more expensive than a branch if
+  /// the branch is usually predicted right.
+  bool PredictableSelectIsExpensive;
+
+  /// \see enableExtLdPromotion.
+  bool EnableExtLdPromotion;
+
+  /// Return true if the value types that can be represented by the specified
+  /// register class are all legal.
+  bool isLegalRC(const TargetRegisterInfo &TRI,
+                 const TargetRegisterClass &RC) const;
+
+  /// Replace/modify any TargetFrameIndex operands with a targte-dependent
+  /// sequence of memory operands that is recognized by PrologEpilogInserter.
+  MachineBasicBlock *emitPatchPoint(MachineInstr &MI,
+                                    MachineBasicBlock *MBB) const;
+};
+
+/// This class defines information used to lower LLVM code to legal SelectionDAG
+/// operators that the target instruction selector can accept natively.
+///
+/// This class also defines callbacks that targets must implement to lower
+/// target-specific constructs to SelectionDAG operators.
+class TargetLowering : public TargetLoweringBase {
+public:
+  struct DAGCombinerInfo;
+
+  TargetLowering(const TargetLowering &) = delete;
+  TargetLowering &operator=(const TargetLowering &) = delete;
+
+  /// NOTE: The TargetMachine owns TLOF.
+  explicit TargetLowering(const TargetMachine &TM);
+
+  bool isPositionIndependent() const;
+
+  /// Returns true by value, base pointer and offset pointer and addressing mode
+  /// by reference if the node's address can be legally represented as
+  /// pre-indexed load / store address.
+  virtual bool getPreIndexedAddressParts(SDNode * /*N*/, SDValue &/*Base*/,
+                                         SDValue &/*Offset*/,
+                                         ISD::MemIndexedMode &/*AM*/,
+                                         SelectionDAG &/*DAG*/) const {
+    return false;
+  }
+
+  /// Returns true by value, base pointer and offset pointer and addressing mode
+  /// by reference if this node can be combined with a load / store to form a
+  /// post-indexed load / store.
+  virtual bool getPostIndexedAddressParts(SDNode * /*N*/, SDNode * /*Op*/,
+                                          SDValue &/*Base*/,
+                                          SDValue &/*Offset*/,
+                                          ISD::MemIndexedMode &/*AM*/,
+                                          SelectionDAG &/*DAG*/) const {
+    return false;
+  }
+
+  /// Return the entry encoding for a jump table in the current function.  The
+  /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
+  virtual unsigned getJumpTableEncoding() const;
+
+  virtual const MCExpr *
+  LowerCustomJumpTableEntry(const MachineJumpTableInfo * /*MJTI*/,
+                            const MachineBasicBlock * /*MBB*/, unsigned /*uid*/,
+                            MCContext &/*Ctx*/) const {
+    llvm_unreachable("Need to implement this hook if target has custom JTIs");
+  }
+
+  /// Returns relocation base for the given PIC jumptable.
+  virtual SDValue getPICJumpTableRelocBase(SDValue Table,
+                                           SelectionDAG &DAG) const;
+
+  /// This returns the relocation base for the given PIC jumptable, the same as
+  /// getPICJumpTableRelocBase, but as an MCExpr.
+  virtual const MCExpr *
+  getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
+                               unsigned JTI, MCContext &Ctx) const;
+
+  /// Return true if folding a constant offset with the given GlobalAddress is
+  /// legal.  It is frequently not legal in PIC relocation models.
+  virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+  bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
+                            SDValue &Chain) const;
+
+  void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS,
+                           SDValue &NewRHS, ISD::CondCode &CCCode,
+                           const SDLoc &DL) const;
+
+  /// Returns a pair of (return value, chain).
+  /// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC.
+  std::pair<SDValue, SDValue> makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC,
+                                          EVT RetVT, ArrayRef<SDValue> Ops,
+                                          bool isSigned, const SDLoc &dl,
+                                          bool doesNotReturn = false,
+                                          bool isReturnValueUsed = true) const;
+
+  /// Check whether parameters to a call that are passed in callee saved
+  /// registers are the same as from the calling function.  This needs to be
+  /// checked for tail call eligibility.
+  bool parametersInCSRMatch(const MachineRegisterInfo &MRI,
+      const uint32_t *CallerPreservedMask,
+      const SmallVectorImpl<CCValAssign> &ArgLocs,
+      const SmallVectorImpl<SDValue> &OutVals) const;
+
+  //===--------------------------------------------------------------------===//
+  // TargetLowering Optimization Methods
+  //
+
+  /// A convenience struct that encapsulates a DAG, and two SDValues for
+  /// returning information from TargetLowering to its clients that want to
+  /// combine.
+  struct TargetLoweringOpt {
+    SelectionDAG &DAG;
+    bool LegalTys;
+    bool LegalOps;
+    SDValue Old;
+    SDValue New;
+
+    explicit TargetLoweringOpt(SelectionDAG &InDAG,
+                               bool LT, bool LO) :
+      DAG(InDAG), LegalTys(LT), LegalOps(LO) {}
+
+    bool LegalTypes() const { return LegalTys; }
+    bool LegalOperations() const { return LegalOps; }
+
+    bool CombineTo(SDValue O, SDValue N) {
+      Old = O;
+      New = N;
+      return true;
+    }
+  };
+
+  /// Check to see if the specified operand of the specified instruction is a
+  /// constant integer.  If so, check to see if there are any bits set in the
+  /// constant that are not demanded.  If so, shrink the constant and return
+  /// true.
+  bool ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
+                              TargetLoweringOpt &TLO) const;
+
+  // Target hook to do target-specific const optimization, which is called by
+  // ShrinkDemandedConstant. This function should return true if the target
+  // doesn't want ShrinkDemandedConstant to further optimize the constant.
+  virtual bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
+                                            TargetLoweringOpt &TLO) const {
+    return false;
+  }
+
+  /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.  This
+  /// uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
+  /// generalized for targets with other types of implicit widening casts.
+  bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded,
+                        TargetLoweringOpt &TLO) const;
+
+  /// Helper for SimplifyDemandedBits that can simplify an operation with
+  /// multiple uses.  This function simplifies operand \p OpIdx of \p User and
+  /// then updates \p User with the simplified version. No other uses of
+  /// \p OpIdx are updated. If \p User is the only user of \p OpIdx, this
+  /// function behaves exactly like function SimplifyDemandedBits declared
+  /// below except that it also updates the DAG by calling
+  /// DCI.CommitTargetLoweringOpt.
+  bool SimplifyDemandedBits(SDNode *User, unsigned OpIdx, const APInt &Demanded,
+                            DAGCombinerInfo &DCI, TargetLoweringOpt &TLO) const;
+
+  /// Look at Op.  At this point, we know that only the DemandedMask bits of the
+  /// result of Op are ever used downstream.  If we can use this information to
+  /// simplify Op, create a new simplified DAG node and return true, returning
+  /// the original and new nodes in Old and New.  Otherwise, analyze the
+  /// expression and return a mask of KnownOne and KnownZero bits for the
+  /// expression (used to simplify the caller).  The KnownZero/One bits may only
+  /// be accurate for those bits in the DemandedMask.
+  /// \p AssumeSingleUse When this parameter is true, this function will
+  ///    attempt to simplify \p Op even if there are multiple uses.
+  ///    Callers are responsible for correctly updating the DAG based on the
+  ///    results of this function, because simply replacing replacing TLO.Old
+  ///    with TLO.New will be incorrect when this parameter is true and TLO.Old
+  ///    has multiple uses.
+  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask,
+                            KnownBits &Known,
+                            TargetLoweringOpt &TLO,
+                            unsigned Depth = 0,
+                            bool AssumeSingleUse = false) const;
+
+  /// Helper wrapper around SimplifyDemandedBits
+  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask,
+                            DAGCombinerInfo &DCI) const;
+
+  /// Determine which of the bits specified in Mask are known to be either zero
+  /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts
+  /// argument allows us to only collect the known bits that are shared by the
+  /// requested vector elements.
+  virtual void computeKnownBitsForTargetNode(const SDValue Op,
+                                             KnownBits &Known,
+                                             const APInt &DemandedElts,
+                                             const SelectionDAG &DAG,
+                                             unsigned Depth = 0) const;
+
+  /// Determine which of the bits of FrameIndex \p FIOp are known to be 0.
+  /// Default implementation computes low bits based on alignment
+  /// information. This should preserve known bits passed into it.
+  virtual void computeKnownBitsForFrameIndex(const SDValue FIOp,
+                                             KnownBits &Known,
+                                             const APInt &DemandedElts,
+                                             const SelectionDAG &DAG,
+                                             unsigned Depth = 0) const;
+
+  /// This method can be implemented by targets that want to expose additional
+  /// information about sign bits to the DAG Combiner. The DemandedElts
+  /// argument allows us to only collect the minimum sign bits that are shared
+  /// by the requested vector elements.
+  virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
+                                                   const APInt &DemandedElts,
+                                                   const SelectionDAG &DAG,
+                                                   unsigned Depth = 0) const;
+
+  struct DAGCombinerInfo {
+    void *DC;  // The DAG Combiner object.
+    CombineLevel Level;
+    bool CalledByLegalizer;
+
+  public:
+    SelectionDAG &DAG;
+
+    DAGCombinerInfo(SelectionDAG &dag, CombineLevel level,  bool cl, void *dc)
+      : DC(dc), Level(level), CalledByLegalizer(cl), DAG(dag) {}
+
+    bool isBeforeLegalize() const { return Level == BeforeLegalizeTypes; }
+    bool isBeforeLegalizeOps() const { return Level < AfterLegalizeVectorOps; }
+    bool isAfterLegalizeVectorOps() const {
+      return Level == AfterLegalizeDAG;
+    }
+    CombineLevel getDAGCombineLevel() { return Level; }
+    bool isCalledByLegalizer() const { return CalledByLegalizer; }
+
+    void AddToWorklist(SDNode *N);
+    SDValue CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo = true);
+    SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true);
+    SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true);
+
+    void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO);
+  };
+
+  /// Return if the N is a constant or constant vector equal to the true value
+  /// from getBooleanContents().
+  bool isConstTrueVal(const SDNode *N) const;
+
+  /// Return if the N is a constant or constant vector equal to the false value
+  /// from getBooleanContents().
+  bool isConstFalseVal(const SDNode *N) const;
+
+  /// Return a constant of type VT that contains a true value that respects
+  /// getBooleanContents()
+  SDValue getConstTrueVal(SelectionDAG &DAG, EVT VT, const SDLoc &DL) const;
+
+  /// Return if \p N is a True value when extended to \p VT.
+  bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool Signed) const;
+
+  /// Try to simplify a setcc built with the specified operands and cc. If it is
+  /// unable to simplify it, return a null SDValue.
+  SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
+                        bool foldBooleans, DAGCombinerInfo &DCI,
+                        const SDLoc &dl) const;
+
+  // For targets which wrap address, unwrap for analysis.
+  virtual SDValue unwrapAddress(SDValue N) const { return N; }
+
+  /// Returns true (and the GlobalValue and the offset) if the node is a
+  /// GlobalAddress + offset.
+  virtual bool
+  isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const;
+
+  /// This method will be invoked for all target nodes and for any
+  /// target-independent nodes that the target has registered with invoke it
+  /// for.
+  ///
+  /// The semantics are as follows:
+  /// Return Value:
+  ///   SDValue.Val == 0   - No change was made
+  ///   SDValue.Val == N   - N was replaced, is dead, and is already handled.
+  ///   otherwise          - N should be replaced by the returned Operand.
+  ///
+  /// In addition, methods provided by DAGCombinerInfo may be used to perform
+  /// more complex transformations.
+  ///
+  virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+  /// Return true if it is profitable to move a following shift through this
+  //  node, adjusting any immediate operands as necessary to preserve semantics.
+  //  This transformation may not be desirable if it disrupts a particularly
+  //  auspicious target-specific tree (e.g. bitfield extraction in AArch64).
+  //  By default, it returns true.
+  virtual bool isDesirableToCommuteWithShift(const SDNode *N) const {
+    return true;
+  }
+
+  // Return true if it is profitable to combine a BUILD_VECTOR with a stride-pattern
+  // to a shuffle and a truncate.
+  // Example of such a combine:
+  // v4i32 build_vector((extract_elt V, 1),
+  //                    (extract_elt V, 3),
+  //                    (extract_elt V, 5),
+  //                    (extract_elt V, 7))
+  //  -->
+  // v4i32 truncate (bitcast (shuffle<1,u,3,u,5,u,7,u> V, u) to v4i64)
+  virtual bool isDesirableToCombineBuildVectorToShuffleTruncate(
+      ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const {
+    return false;
+  }
+
+  /// Return true if the target has native support for the specified value type
+  /// and it is 'desirable' to use the type for the given node type. e.g. On x86
+  /// i16 is legal, but undesirable since i16 instruction encodings are longer
+  /// and some i16 instructions are slow.
+  virtual bool isTypeDesirableForOp(unsigned /*Opc*/, EVT VT) const {
+    // By default, assume all legal types are desirable.
+    return isTypeLegal(VT);
+  }
+
+  /// Return true if it is profitable for dag combiner to transform a floating
+  /// point op of specified opcode to a equivalent op of an integer
+  /// type. e.g. f32 load -> i32 load can be profitable on ARM.
+  virtual bool isDesirableToTransformToIntegerOp(unsigned /*Opc*/,
+                                                 EVT /*VT*/) const {
+    return false;
+  }
+
+  /// This method query the target whether it is beneficial for dag combiner to
+  /// promote the specified node. If true, it should return the desired
+  /// promotion type by reference.
+  virtual bool IsDesirableToPromoteOp(SDValue /*Op*/, EVT &/*PVT*/) const {
+    return false;
+  }
+
+  /// Return true if the target supports swifterror attribute. It optimizes
+  /// loads and stores to reading and writing a specific register.
+  virtual bool supportSwiftError() const {
+    return false;
+  }
+
+  /// Return true if the target supports that a subset of CSRs for the given
+  /// machine function is handled explicitly via copies.
+  virtual bool supportSplitCSR(MachineFunction *MF) const {
+    return false;
+  }
+
+  /// Perform necessary initialization to handle a subset of CSRs explicitly
+  /// via copies. This function is called at the beginning of instruction
+  /// selection.
+  virtual void initializeSplitCSR(MachineBasicBlock *Entry) const {
+    llvm_unreachable("Not Implemented");
+  }
+
+  /// Insert explicit copies in entry and exit blocks. We copy a subset of
+  /// CSRs to virtual registers in the entry block, and copy them back to
+  /// physical registers in the exit blocks. This function is called at the end
+  /// of instruction selection.
+  virtual void insertCopiesSplitCSR(
+      MachineBasicBlock *Entry,
+      const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
+    llvm_unreachable("Not Implemented");
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Lowering methods - These methods must be implemented by targets so that
+  // the SelectionDAGBuilder code knows how to lower these.
+  //
+
+  /// This hook must be implemented to lower the incoming (formal) arguments,
+  /// described by the Ins array, into the specified DAG. The implementation
+  /// should fill in the InVals array with legal-type argument values, and
+  /// return the resulting token chain value.
+  virtual SDValue LowerFormalArguments(
+      SDValue /*Chain*/, CallingConv::ID /*CallConv*/, bool /*isVarArg*/,
+      const SmallVectorImpl<ISD::InputArg> & /*Ins*/, const SDLoc & /*dl*/,
+      SelectionDAG & /*DAG*/, SmallVectorImpl<SDValue> & /*InVals*/) const {
+    llvm_unreachable("Not Implemented");
+  }
+
+  /// This structure contains all information that is necessary for lowering
+  /// calls. It is passed to TLI::LowerCallTo when the SelectionDAG builder
+  /// needs to lower a call, and targets will see this struct in their LowerCall
+  /// implementation.
+  struct CallLoweringInfo {
+    SDValue Chain;
+    Type *RetTy = nullptr;
+    bool RetSExt           : 1;
+    bool RetZExt           : 1;
+    bool IsVarArg          : 1;
+    bool IsInReg           : 1;
+    bool DoesNotReturn     : 1;
+    bool IsReturnValueUsed : 1;
+    bool IsConvergent      : 1;
+    bool IsPatchPoint      : 1;
+
+    // IsTailCall should be modified by implementations of
+    // TargetLowering::LowerCall that perform tail call conversions.
+    bool IsTailCall = false;
+
+    // Is Call lowering done post SelectionDAG type legalization.
+    bool IsPostTypeLegalization = false;
+
+    unsigned NumFixedArgs = -1;
+    CallingConv::ID CallConv = CallingConv::C;
+    SDValue Callee;
+    ArgListTy Args;
+    SelectionDAG &DAG;
+    SDLoc DL;
+    ImmutableCallSite CS;
+    SmallVector<ISD::OutputArg, 32> Outs;
+    SmallVector<SDValue, 32> OutVals;
+    SmallVector<ISD::InputArg, 32> Ins;
+    SmallVector<SDValue, 4> InVals;
+
+    CallLoweringInfo(SelectionDAG &DAG)
+        : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false),
+          DoesNotReturn(false), IsReturnValueUsed(true), IsConvergent(false),
+          IsPatchPoint(false), DAG(DAG) {}
+
+    CallLoweringInfo &setDebugLoc(const SDLoc &dl) {
+      DL = dl;
+      return *this;
+    }
+
+    CallLoweringInfo &setChain(SDValue InChain) {
+      Chain = InChain;
+      return *this;
+    }
+
+    // setCallee with target/module-specific attributes
+    CallLoweringInfo &setLibCallee(CallingConv::ID CC, Type *ResultType,
+                                   SDValue Target, ArgListTy &&ArgsList) {
+      RetTy = ResultType;
+      Callee = Target;
+      CallConv = CC;
+      NumFixedArgs = ArgsList.size();
+      Args = std::move(ArgsList);
+
+      DAG.getTargetLoweringInfo().markLibCallAttributes(
+          &(DAG.getMachineFunction()), CC, Args);
+      return *this;
+    }
+
+    CallLoweringInfo &setCallee(CallingConv::ID CC, Type *ResultType,
+                                SDValue Target, ArgListTy &&ArgsList) {
+      RetTy = ResultType;
+      Callee = Target;
+      CallConv = CC;
+      NumFixedArgs = ArgsList.size();
+      Args = std::move(ArgsList);
+      return *this;
+    }
+
+    CallLoweringInfo &setCallee(Type *ResultType, FunctionType *FTy,
+                                SDValue Target, ArgListTy &&ArgsList,
+                                ImmutableCallSite Call) {
+      RetTy = ResultType;
+
+      IsInReg = Call.hasRetAttr(Attribute::InReg);
+      DoesNotReturn =
+          Call.doesNotReturn() ||
+          (!Call.isInvoke() &&
+           isa<UnreachableInst>(Call.getInstruction()->getNextNode()));
+      IsVarArg = FTy->isVarArg();
+      IsReturnValueUsed = !Call.getInstruction()->use_empty();
+      RetSExt = Call.hasRetAttr(Attribute::SExt);
+      RetZExt = Call.hasRetAttr(Attribute::ZExt);
+
+      Callee = Target;
+
+      CallConv = Call.getCallingConv();
+      NumFixedArgs = FTy->getNumParams();
+      Args = std::move(ArgsList);
+
+      CS = Call;
+
+      return *this;
+    }
+
+    CallLoweringInfo &setInRegister(bool Value = true) {
+      IsInReg = Value;
+      return *this;
+    }
+
+    CallLoweringInfo &setNoReturn(bool Value = true) {
+      DoesNotReturn = Value;
+      return *this;
+    }
+
+    CallLoweringInfo &setVarArg(bool Value = true) {
+      IsVarArg = Value;
+      return *this;
+    }
+
+    CallLoweringInfo &setTailCall(bool Value = true) {
+      IsTailCall = Value;
+      return *this;
+    }
+
+    CallLoweringInfo &setDiscardResult(bool Value = true) {
+      IsReturnValueUsed = !Value;
+      return *this;
+    }
+
+    CallLoweringInfo &setConvergent(bool Value = true) {
+      IsConvergent = Value;
+      return *this;
+    }
+
+    CallLoweringInfo &setSExtResult(bool Value = true) {
+      RetSExt = Value;
+      return *this;
+    }
+
+    CallLoweringInfo &setZExtResult(bool Value = true) {
+      RetZExt = Value;
+      return *this;
+    }
+
+    CallLoweringInfo &setIsPatchPoint(bool Value = true) {
+      IsPatchPoint = Value;
+      return *this;
+    }
+
+    CallLoweringInfo &setIsPostTypeLegalization(bool Value=true) {
+      IsPostTypeLegalization = Value;
+      return *this;
+    }
+
+    ArgListTy &getArgs() {
+      return Args;
+    }
+  };
+
+  /// This function lowers an abstract call to a function into an actual call.
+  /// This returns a pair of operands.  The first element is the return value
+  /// for the function (if RetTy is not VoidTy).  The second element is the
+  /// outgoing token chain. It calls LowerCall to do the actual lowering.
+  std::pair<SDValue, SDValue> LowerCallTo(CallLoweringInfo &CLI) const;
+
+  /// This hook must be implemented to lower calls into the specified
+  /// DAG. The outgoing arguments to the call are described by the Outs array,
+  /// and the values to be returned by the call are described by the Ins
+  /// array. The implementation should fill in the InVals array with legal-type
+  /// return values from the call, and return the resulting token chain value.
+  virtual SDValue
+    LowerCall(CallLoweringInfo &/*CLI*/,
+              SmallVectorImpl<SDValue> &/*InVals*/) const {
+    llvm_unreachable("Not Implemented");
+  }
+
+  /// Target-specific cleanup for formal ByVal parameters.
+  virtual void HandleByVal(CCState *, unsigned &, unsigned) const {}
+
+  /// This hook should be implemented to check whether the return values
+  /// described by the Outs array can fit into the return registers.  If false
+  /// is returned, an sret-demotion is performed.
+  virtual bool CanLowerReturn(CallingConv::ID /*CallConv*/,
+                              MachineFunction &/*MF*/, bool /*isVarArg*/,
+               const SmallVectorImpl<ISD::OutputArg> &/*Outs*/,
+               LLVMContext &/*Context*/) const
+  {
+    // Return true by default to get preexisting behavior.
+    return true;
+  }
+
+  /// This hook must be implemented to lower outgoing return values, described
+  /// by the Outs array, into the specified DAG. The implementation should
+  /// return the resulting token chain value.
+  virtual SDValue LowerReturn(SDValue /*Chain*/, CallingConv::ID /*CallConv*/,
+                              bool /*isVarArg*/,
+                              const SmallVectorImpl<ISD::OutputArg> & /*Outs*/,
+                              const SmallVectorImpl<SDValue> & /*OutVals*/,
+                              const SDLoc & /*dl*/,
+                              SelectionDAG & /*DAG*/) const {
+    llvm_unreachable("Not Implemented");
+  }
+
+  /// Return true if result of the specified node is used by a return node
+  /// only. It also compute and return the input chain for the tail call.
+  ///
+  /// This is used to determine whether it is possible to codegen a libcall as
+  /// tail call at legalization time.
+  virtual bool isUsedByReturnOnly(SDNode *, SDValue &/*Chain*/) const {
+    return false;
+  }
+
+  /// Return true if the target may be able emit the call instruction as a tail
+  /// call. This is used by optimization passes to determine if it's profitable
+  /// to duplicate return instructions to enable tailcall optimization.
+  virtual bool mayBeEmittedAsTailCall(const CallInst *) const {
+    return false;
+  }
+
+  /// Return the builtin name for the __builtin___clear_cache intrinsic
+  /// Default is to invoke the clear cache library call
+  virtual const char * getClearCacheBuiltinName() const {
+    return "__clear_cache";
+  }
+
+  /// Return the register ID of the name passed in. Used by named register
+  /// global variables extension. There is no target-independent behaviour
+  /// so the default action is to bail.
+  virtual unsigned getRegisterByName(const char* RegName, EVT VT,
+                                     SelectionDAG &DAG) const {
+    report_fatal_error("Named registers not implemented for this target");
+  }
+
+  /// Return the type that should be used to zero or sign extend a
+  /// zeroext/signext integer return value.  FIXME: Some C calling conventions
+  /// require the return type to be promoted, but this is not true all the time,
+  /// e.g. i1/i8/i16 on x86/x86_64. It is also not necessary for non-C calling
+  /// conventions. The frontend should handle this and include all of the
+  /// necessary information.
+  virtual EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
+                                       ISD::NodeType /*ExtendKind*/) const {
+    EVT MinVT = getRegisterType(Context, MVT::i32);
+    return VT.bitsLT(MinVT) ? MinVT : VT;
+  }
+
+  /// For some targets, an LLVM struct type must be broken down into multiple
+  /// simple types, but the calling convention specifies that the entire struct
+  /// must be passed in a block of consecutive registers.
+  virtual bool
+  functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv,
+                                            bool isVarArg) const {
+    return false;
+  }
+
+  /// Returns a 0 terminated array of registers that can be safely used as
+  /// scratch registers.
+  virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const {
+    return nullptr;
+  }
+
+  /// This callback is used to prepare for a volatile or atomic load.
+  /// It takes a chain node as input and returns the chain for the load itself.
+  ///
+  /// Having a callback like this is necessary for targets like SystemZ,
+  /// which allows a CPU to reuse the result of a previous load indefinitely,
+  /// even if a cache-coherent store is performed by another CPU.  The default
+  /// implementation does nothing.
+  virtual SDValue prepareVolatileOrAtomicLoad(SDValue Chain, const SDLoc &DL,
+                                              SelectionDAG &DAG) const {
+    return Chain;
+  }
+
+  /// This callback is used to inspect load/store instructions and add
+  /// target-specific MachineMemOperand flags to them.  The default
+  /// implementation does nothing.
+  virtual MachineMemOperand::Flags getMMOFlags(const Instruction &I) const {
+    return MachineMemOperand::MONone;
+  }
+
+  /// This callback is invoked by the type legalizer to legalize nodes with an
+  /// illegal operand type but legal result types.  It replaces the
+  /// LowerOperation callback in the type Legalizer.  The reason we can not do
+  /// away with LowerOperation entirely is that LegalizeDAG isn't yet ready to
+  /// use this callback.
+  ///
+  /// TODO: Consider merging with ReplaceNodeResults.
+  ///
+  /// The target places new result values for the node in Results (their number
+  /// and types must exactly match those of the original return values of
+  /// the node), or leaves Results empty, which indicates that the node is not
+  /// to be custom lowered after all.
+  /// The default implementation calls LowerOperation.
+  virtual void LowerOperationWrapper(SDNode *N,
+                                     SmallVectorImpl<SDValue> &Results,
+                                     SelectionDAG &DAG) const;
+
+  /// This callback is invoked for operations that are unsupported by the
+  /// target, which are registered to use 'custom' lowering, and whose defined
+  /// values are all legal.  If the target has no operations that require custom
+  /// lowering, it need not implement this.  The default implementation of this
+  /// aborts.
+  virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+  /// This callback is invoked when a node result type is illegal for the
+  /// target, and the operation was registered to use 'custom' lowering for that
+  /// result type.  The target places new result values for the node in Results
+  /// (their number and types must exactly match those of the original return
+  /// values of the node), or leaves Results empty, which indicates that the
+  /// node is not to be custom lowered after all.
+  ///
+  /// If the target has no operations that require custom lowering, it need not
+  /// implement this.  The default implementation aborts.
+  virtual void ReplaceNodeResults(SDNode * /*N*/,
+                                  SmallVectorImpl<SDValue> &/*Results*/,
+                                  SelectionDAG &/*DAG*/) const {
+    llvm_unreachable("ReplaceNodeResults not implemented for this target!");
+  }
+
+  /// This method returns the name of a target specific DAG node.
+  virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+  /// This method returns a target specific FastISel object, or null if the
+  /// target does not support "fast" ISel.
+  virtual FastISel *createFastISel(FunctionLoweringInfo &,
+                                   const TargetLibraryInfo *) const {
+    return nullptr;
+  }
+
+  bool verifyReturnAddressArgumentIsConstant(SDValue Op,
+                                             SelectionDAG &DAG) const;
+
+  //===--------------------------------------------------------------------===//
+  // Inline Asm Support hooks
+  //
+
+  /// This hook allows the target to expand an inline asm call to be explicit
+  /// llvm code if it wants to.  This is useful for turning simple inline asms
+  /// into LLVM intrinsics, which gives the compiler more information about the
+  /// behavior of the code.
+  virtual bool ExpandInlineAsm(CallInst *) const {
+    return false;
+  }
+
+  enum ConstraintType {
+    C_Register,            // Constraint represents specific register(s).
+    C_RegisterClass,       // Constraint represents any of register(s) in class.
+    C_Memory,              // Memory constraint.
+    C_Other,               // Something else.
+    C_Unknown              // Unsupported constraint.
+  };
+
+  enum ConstraintWeight {
+    // Generic weights.
+    CW_Invalid  = -1,     // No match.
+    CW_Okay     = 0,      // Acceptable.
+    CW_Good     = 1,      // Good weight.
+    CW_Better   = 2,      // Better weight.
+    CW_Best     = 3,      // Best weight.
+
+    // Well-known weights.
+    CW_SpecificReg  = CW_Okay,    // Specific register operands.
+    CW_Register     = CW_Good,    // Register operands.
+    CW_Memory       = CW_Better,  // Memory operands.
+    CW_Constant     = CW_Best,    // Constant operand.
+    CW_Default      = CW_Okay     // Default or don't know type.
+  };
+
+  /// This contains information for each constraint that we are lowering.
+  struct AsmOperandInfo : public InlineAsm::ConstraintInfo {
+    /// This contains the actual string for the code, like "m".  TargetLowering
+    /// picks the 'best' code from ConstraintInfo::Codes that most closely
+    /// matches the operand.
+    std::string ConstraintCode;
+
+    /// Information about the constraint code, e.g. Register, RegisterClass,
+    /// Memory, Other, Unknown.
+    TargetLowering::ConstraintType ConstraintType = TargetLowering::C_Unknown;
+
+    /// If this is the result output operand or a clobber, this is null,
+    /// otherwise it is the incoming operand to the CallInst.  This gets
+    /// modified as the asm is processed.
+    Value *CallOperandVal = nullptr;
+
+    /// The ValueType for the operand value.
+    MVT ConstraintVT = MVT::Other;
+
+    /// Copy constructor for copying from a ConstraintInfo.
+    AsmOperandInfo(InlineAsm::ConstraintInfo Info)
+        : InlineAsm::ConstraintInfo(std::move(Info)) {}
+
+    /// Return true of this is an input operand that is a matching constraint
+    /// like "4".
+    bool isMatchingInputConstraint() const;
+
+    /// If this is an input matching constraint, this method returns the output
+    /// operand it matches.
+    unsigned getMatchedOperand() const;
+  };
+
+  using AsmOperandInfoVector = std::vector<AsmOperandInfo>;
+
+  /// Split up the constraint string from the inline assembly value into the
+  /// specific constraints and their prefixes, and also tie in the associated
+  /// operand values.  If this returns an empty vector, and if the constraint
+  /// string itself isn't empty, there was an error parsing.
+  virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL,
+                                                const TargetRegisterInfo *TRI,
+                                                ImmutableCallSite CS) const;
+
+  /// Examine constraint type and operand type and determine a weight value.
+  /// The operand object must already have been set up with the operand type.
+  virtual ConstraintWeight getMultipleConstraintMatchWeight(
+      AsmOperandInfo &info, int maIndex) const;
+
+  /// Examine constraint string and operand type and determine a weight value.
+  /// The operand object must already have been set up with the operand type.
+  virtual ConstraintWeight getSingleConstraintMatchWeight(
+      AsmOperandInfo &info, const char *constraint) const;
+
+  /// Determines the constraint code and constraint type to use for the specific
+  /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
+  /// If the actual operand being passed in is available, it can be passed in as
+  /// Op, otherwise an empty SDValue can be passed.
+  virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo,
+                                      SDValue Op,
+                                      SelectionDAG *DAG = nullptr) const;
+
+  /// Given a constraint, return the type of constraint it is for this target.
+  virtual ConstraintType getConstraintType(StringRef Constraint) const;
+
+  /// Given a physical register constraint (e.g.  {edx}), return the register
+  /// number and the register class for the register.
+  ///
+  /// Given a register class constraint, like 'r', if this corresponds directly
+  /// to an LLVM register class, return a register of 0 and the register class
+  /// pointer.
+  ///
+  /// This should only be used for C_Register constraints.  On error, this
+  /// returns a register number of 0 and a null register class pointer.
+  virtual std::pair<unsigned, const TargetRegisterClass *>
+  getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                               StringRef Constraint, MVT VT) const;
+
+  virtual unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const {
+    if (ConstraintCode == "i")
+      return InlineAsm::Constraint_i;
+    else if (ConstraintCode == "m")
+      return InlineAsm::Constraint_m;
+    return InlineAsm::Constraint_Unknown;
+  }
+
+  /// Try to replace an X constraint, which matches anything, with another that
+  /// has more specific requirements based on the type of the corresponding
+  /// operand.  This returns null if there is no replacement to make.
+  virtual const char *LowerXConstraint(EVT ConstraintVT) const;
+
+  /// Lower the specified operand into the Ops vector.  If it is invalid, don't
+  /// add anything to Ops.
+  virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
+                                            std::vector<SDValue> &Ops,
+                                            SelectionDAG &DAG) const;
+
+  //===--------------------------------------------------------------------===//
+  // Div utility functions
+  //
+  SDValue BuildSDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
+                    bool IsAfterLegalization,
+                    std::vector<SDNode *> *Created) const;
+  SDValue BuildUDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
+                    bool IsAfterLegalization,
+                    std::vector<SDNode *> *Created) const;
+
+  /// Targets may override this function to provide custom SDIV lowering for
+  /// power-of-2 denominators.  If the target returns an empty SDValue, LLVM
+  /// assumes SDIV is expensive and replaces it with a series of other integer
+  /// operations.
+  virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor,
+                                SelectionDAG &DAG,
+                                std::vector<SDNode *> *Created) const;
+
+  /// Indicate whether this target prefers to combine FDIVs with the same
+  /// divisor. If the transform should never be done, return zero. If the
+  /// transform should be done, return the minimum number of divisor uses
+  /// that must exist.
+  virtual unsigned combineRepeatedFPDivisors() const {
+    return 0;
+  }
+
+  /// Hooks for building estimates in place of slower divisions and square
+  /// roots.
+
+  /// Return either a square root or its reciprocal estimate value for the input
+  /// operand.
+  /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or
+  /// 'Enabled' as set by a potential default override attribute.
+  /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson
+  /// refinement iterations required to generate a sufficient (though not
+  /// necessarily IEEE-754 compliant) estimate is returned in that parameter.
+  /// The boolean UseOneConstNR output is used to select a Newton-Raphson
+  /// algorithm implementation that uses either one or two constants.
+  /// The boolean Reciprocal is used to select whether the estimate is for the
+  /// square root of the input operand or the reciprocal of its square root.
+  /// A target may choose to implement its own refinement within this function.
+  /// If that's true, then return '0' as the number of RefinementSteps to avoid
+  /// any further refinement of the estimate.
+  /// An empty SDValue return means no estimate sequence can be created.
+  virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
+                                  int Enabled, int &RefinementSteps,
+                                  bool &UseOneConstNR, bool Reciprocal) const {
+    return SDValue();
+  }
+
+  /// Return a reciprocal estimate value for the input operand.
+  /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or
+  /// 'Enabled' as set by a potential default override attribute.
+  /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson
+  /// refinement iterations required to generate a sufficient (though not
+  /// necessarily IEEE-754 compliant) estimate is returned in that parameter.
+  /// A target may choose to implement its own refinement within this function.
+  /// If that's true, then return '0' as the number of RefinementSteps to avoid
+  /// any further refinement of the estimate.
+  /// An empty SDValue return means no estimate sequence can be created.
+  virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
+                                   int Enabled, int &RefinementSteps) const {
+    return SDValue();
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Legalization utility functions
+  //
+
+  /// Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes,
+  /// respectively, each computing an n/2-bit part of the result.
+  /// \param Result A vector that will be filled with the parts of the result
+  ///        in little-endian order.
+  /// \param LL Low bits of the LHS of the MUL.  You can use this parameter
+  ///        if you want to control how low bits are extracted from the LHS.
+  /// \param LH High bits of the LHS of the MUL.  See LL for meaning.
+  /// \param RL Low bits of the RHS of the MUL.  See LL for meaning
+  /// \param RH High bits of the RHS of the MUL.  See LL for meaning.
+  /// \returns true if the node has been expanded, false if it has not
+  bool expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl, SDValue LHS,
+                      SDValue RHS, SmallVectorImpl<SDValue> &Result, EVT HiLoVT,
+                      SelectionDAG &DAG, MulExpansionKind Kind,
+                      SDValue LL = SDValue(), SDValue LH = SDValue(),
+                      SDValue RL = SDValue(), SDValue RH = SDValue()) const;
+
+  /// Expand a MUL into two nodes.  One that computes the high bits of
+  /// the result and one that computes the low bits.
+  /// \param HiLoVT The value type to use for the Lo and Hi nodes.
+  /// \param LL Low bits of the LHS of the MUL.  You can use this parameter
+  ///        if you want to control how low bits are extracted from the LHS.
+  /// \param LH High bits of the LHS of the MUL.  See LL for meaning.
+  /// \param RL Low bits of the RHS of the MUL.  See LL for meaning
+  /// \param RH High bits of the RHS of the MUL.  See LL for meaning.
+  /// \returns true if the node has been expanded. false if it has not
+  bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
+                 SelectionDAG &DAG, MulExpansionKind Kind,
+                 SDValue LL = SDValue(), SDValue LH = SDValue(),
+                 SDValue RL = SDValue(), SDValue RH = SDValue()) const;
+
+  /// Expand float(f32) to SINT(i64) conversion
+  /// \param N Node to expand
+  /// \param Result output after conversion
+  /// \returns True, if the expansion was successful, false otherwise
+  bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+
+  /// Turn load of vector type into a load of the individual elements.
+  /// \param LD load to expand
+  /// \returns MERGE_VALUEs of the scalar loads with their chains.
+  SDValue scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const;
+
+  // Turn a store of a vector type into stores of the individual elements.
+  /// \param ST Store with a vector value type
+  /// \returns MERGE_VALUs of the individual store chains.
+  SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const;
+
+  /// Expands an unaligned load to 2 half-size loads for an integer, and
+  /// possibly more for vectors.
+  std::pair<SDValue, SDValue> expandUnalignedLoad(LoadSDNode *LD,
+                                                  SelectionDAG &DAG) const;
+
+  /// Expands an unaligned store to 2 half-size stores for integer values, and
+  /// possibly more for vectors.
+  SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const;
+
+  /// Increments memory address \p Addr according to the type of the value
+  /// \p DataVT that should be stored. If the data is stored in compressed
+  /// form, the memory address should be incremented according to the number of
+  /// the stored elements. This number is equal to the number of '1's bits
+  /// in the \p Mask.
+  /// \p DataVT is a vector type. \p Mask is a vector value.
+  /// \p DataVT and \p Mask have the same number of vector elements.
+  SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL,
+                                 EVT DataVT, SelectionDAG &DAG,
+                                 bool IsCompressedMemory) const;
+
+  /// Get a pointer to vector element \p Idx located in memory for a vector of
+  /// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out of
+  /// bounds the returned pointer is unspecified, but will be within the vector
+  /// bounds.
+  SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT,
+                                  SDValue Idx) const;
+
+  //===--------------------------------------------------------------------===//
+  // Instruction Emitting Hooks
+  //
+
+  /// This method should be implemented by targets that mark instructions with
+  /// the 'usesCustomInserter' flag.  These instructions are special in various
+  /// ways, which require special support to insert.  The specified MachineInstr
+  /// is created but not inserted into any basic blocks, and this method is
+  /// called to expand it into a sequence of instructions, potentially also
+  /// creating new basic blocks and control flow.
+  /// As long as the returned basic block is different (i.e., we created a new
+  /// one), the custom inserter is free to modify the rest of \p MBB.
+  virtual MachineBasicBlock *
+  EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const;
+
+  /// This method should be implemented by targets that mark instructions with
+  /// the 'hasPostISelHook' flag. These instructions must be adjusted after
+  /// instruction selection by target hooks.  e.g. To fill in optional defs for
+  /// ARM 's' setting instructions.
+  virtual void AdjustInstrPostInstrSelection(MachineInstr &MI,
+                                             SDNode *Node) const;
+
+  /// If this function returns true, SelectionDAGBuilder emits a
+  /// LOAD_STACK_GUARD node when it is lowering Intrinsic::stackprotector.
+  virtual bool useLoadStackGuardNode() const {
+    return false;
+  }
+
+  virtual SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
+                                      const SDLoc &DL) const {
+    llvm_unreachable("not implemented for this target");
+  }
+
+  /// Lower TLS global address SDNode for target independent emulated TLS model.
+  virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
+                                          SelectionDAG &DAG) const;
+
+  // seteq(x, 0) -> truncate(srl(ctlz(zext(x)), log2(#bits)))
+  // If we're comparing for equality to zero and isCtlzFast is true, expose the
+  // fact that this can be implemented as a ctlz/srl pair, so that the dag
+  // combiner can fold the new nodes.
+  SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const;
+
+private:
+  SDValue simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
+                               ISD::CondCode Cond, DAGCombinerInfo &DCI,
+                               const SDLoc &DL) const;
+};
+
+/// Given an LLVM IR type and return type attributes, compute the return value
+/// EVTs and flags, and optionally also the offsets, if the return value is
+/// being lowered to memory.
+void GetReturnInfo(Type *ReturnType, AttributeList attr,
+                   SmallVectorImpl<ISD::OutputArg> &Outs,
+                   const TargetLowering &TLI, const DataLayout &DL);
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_TARGETLOWERING_H
diff --git a/include/llvm/CodeGen/TargetLoweringObjectFile.h b/include/llvm/CodeGen/TargetLoweringObjectFile.h
new file mode 100644
index 0000000000000..fe77c2954129f
--- /dev/null
+++ b/include/llvm/CodeGen/TargetLoweringObjectFile.h
@@ -0,0 +1,194 @@
+//===-- llvm/CodeGen/TargetLoweringObjectFile.h - Object Info ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements classes used to handle lowerings specific to common
+// object file formats.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_TARGETLOWERINGOBJECTFILE_H
+#define LLVM_CODEGEN_TARGETLOWERINGOBJECTFILE_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/SectionKind.h"
+#include <cstdint>
+
+namespace llvm {
+
+class GlobalValue;
+class MachineModuleInfo;
+class Mangler;
+class MCContext;
+class MCExpr;
+class MCSection;
+class MCSymbol;
+class MCSymbolRefExpr;
+class MCStreamer;
+class MCValue;
+class TargetMachine;
+
+class TargetLoweringObjectFile : public MCObjectFileInfo {
+  MCContext *Ctx = nullptr;
+
+  /// Name-mangler for global names.
+  Mangler *Mang = nullptr;
+
+protected:
+  bool SupportIndirectSymViaGOTPCRel = false;
+  bool SupportGOTPCRelWithOffset = true;
+
+  /// This section contains the static constructor pointer list.
+  MCSection *StaticCtorSection = nullptr;
+
+  /// This section contains the static destructor pointer list.
+  MCSection *StaticDtorSection = nullptr;
+
+public:
+  TargetLoweringObjectFile() = default;
+  TargetLoweringObjectFile(const TargetLoweringObjectFile &) = delete;
+  TargetLoweringObjectFile &
+  operator=(const TargetLoweringObjectFile &) = delete;
+  virtual ~TargetLoweringObjectFile();
+
+  MCContext &getContext() const { return *Ctx; }
+  Mangler &getMangler() const { return *Mang; }
+
+  /// This method must be called before any actual lowering is done.  This
+  /// specifies the current context for codegen, and gives the lowering
+  /// implementations a chance to set up their default sections.
+  virtual void Initialize(MCContext &ctx, const TargetMachine &TM);
+
+  virtual void emitPersonalityValue(MCStreamer &Streamer, const DataLayout &TM,
+                                    const MCSymbol *Sym) const;
+
+  /// Emit the module-level metadata that the platform cares about.
+  virtual void emitModuleMetadata(MCStreamer &Streamer, Module &M,
+                                  const TargetMachine &TM) const {}
+
+  /// Given a constant with the SectionKind, return a section that it should be
+  /// placed in.
+  virtual MCSection *getSectionForConstant(const DataLayout &DL,
+                                           SectionKind Kind,
+                                           const Constant *C,
+                                           unsigned &Align) const;
+
+  /// Classify the specified global variable into a set of target independent
+  /// categories embodied in SectionKind.
+  static SectionKind getKindForGlobal(const GlobalObject *GO,
+                                      const TargetMachine &TM);
+
+  /// This method computes the appropriate section to emit the specified global
+  /// variable or function definition. This should not be passed external (or
+  /// available externally) globals.
+  MCSection *SectionForGlobal(const GlobalObject *GO, SectionKind Kind,
+                              const TargetMachine &TM) const;
+
+  /// This method computes the appropriate section to emit the specified global
+  /// variable or function definition. This should not be passed external (or
+  /// available externally) globals.
+  MCSection *SectionForGlobal(const GlobalObject *GO,
+                              const TargetMachine &TM) const {
+    return SectionForGlobal(GO, getKindForGlobal(GO, TM), TM);
+  }
+
+  virtual void getNameWithPrefix(SmallVectorImpl<char> &OutName,
+                                 const GlobalValue *GV,
+                                 const TargetMachine &TM) const;
+
+  virtual MCSection *getSectionForJumpTable(const Function &F,
+                                            const TargetMachine &TM) const;
+
+  virtual bool shouldPutJumpTableInFunctionSection(bool UsesLabelDifference,
+                                                   const Function &F) const;
+
+  /// Targets should implement this method to assign a section to globals with
+  /// an explicit section specfied. The implementation of this method can
+  /// assume that GO->hasSection() is true.
+  virtual MCSection *
+  getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind,
+                           const TargetMachine &TM) const = 0;
+
+  /// Return an MCExpr to use for a reference to the specified global variable
+  /// from exception handling information.
+  virtual const MCExpr *getTTypeGlobalReference(const GlobalValue *GV,
+                                                unsigned Encoding,
+                                                const TargetMachine &TM,
+                                                MachineModuleInfo *MMI,
+                                                MCStreamer &Streamer) const;
+
+  /// Return the MCSymbol for a private symbol with global value name as its
+  /// base, with the specified suffix.
+  MCSymbol *getSymbolWithGlobalValueBase(const GlobalValue *GV,
+                                         StringRef Suffix,
+                                         const TargetMachine &TM) const;
+
+  // The symbol that gets passed to .cfi_personality.
+  virtual MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV,
+                                            const TargetMachine &TM,
+                                            MachineModuleInfo *MMI) const;
+
+  const MCExpr *getTTypeReference(const MCSymbolRefExpr *Sym, unsigned Encoding,
+                                  MCStreamer &Streamer) const;
+
+  virtual MCSection *getStaticCtorSection(unsigned Priority,
+                                          const MCSymbol *KeySym) const {
+    return StaticCtorSection;
+  }
+
+  virtual MCSection *getStaticDtorSection(unsigned Priority,
+                                          const MCSymbol *KeySym) const {
+    return StaticDtorSection;
+  }
+
+  /// \brief Create a symbol reference to describe the given TLS variable when
+  /// emitting the address in debug info.
+  virtual const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const;
+
+  virtual const MCExpr *lowerRelativeReference(const GlobalValue *LHS,
+                                               const GlobalValue *RHS,
+                                               const TargetMachine &TM) const {
+    return nullptr;
+  }
+
+  /// \brief Target supports replacing a data "PC"-relative access to a symbol
+  /// through another symbol, by accessing the later via a GOT entry instead?
+  bool supportIndirectSymViaGOTPCRel() const {
+    return SupportIndirectSymViaGOTPCRel;
+  }
+
+  /// \brief Target GOT "PC"-relative relocation supports encoding an additional
+  /// binary expression with an offset?
+  bool supportGOTPCRelWithOffset() const {
+    return SupportGOTPCRelWithOffset;
+  }
+
+  /// \brief Get the target specific PC relative GOT entry relocation
+  virtual const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym,
+                                                  const MCValue &MV,
+                                                  int64_t Offset,
+                                                  MachineModuleInfo *MMI,
+                                                  MCStreamer &Streamer) const {
+    return nullptr;
+  }
+
+  virtual void emitLinkerFlagsForGlobal(raw_ostream &OS,
+                                        const GlobalValue *GV) const {}
+
+protected:
+  virtual MCSection *SelectSectionForGlobal(const GlobalObject *GO,
+                                            SectionKind Kind,
+                                            const TargetMachine &TM) const = 0;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_TARGETLOWERINGOBJECTFILE_H
diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index e4d3cc9cecfcc..69de9f8cb35da 100644
--- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -15,9 +15,9 @@
 #ifndef LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H
 #define LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H
 
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
 #include "llvm/IR/Module.h"
 #include "llvm/MC/MCExpr.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
 
 namespace llvm {
 
@@ -182,6 +182,10 @@ public:
                                            const Function &F) const override;
 
   void InitializeWasm();
+  MCSection *getStaticCtorSection(unsigned Priority,
+                                  const MCSymbol *KeySym) const override;
+  MCSection *getStaticDtorSection(unsigned Priority,
+                                  const MCSymbol *KeySym) const override;
 
   const MCExpr *lowerRelativeReference(const GlobalValue *LHS,
                                        const GlobalValue *RHS,
diff --git a/include/llvm/CodeGen/TargetOpcodes.def b/include/llvm/CodeGen/TargetOpcodes.def
new file mode 100644
index 0000000000000..d3e8483798a7f
--- /dev/null
+++ b/include/llvm/CodeGen/TargetOpcodes.def
@@ -0,0 +1,461 @@
+//===-- llvm/CodeGen/TargetOpcodes.def - Target Indep Opcodes ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the target independent instruction opcodes.
+//
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+/// HANDLE_TARGET_OPCODE defines an opcode and its associated enum value.
+///
+#ifndef HANDLE_TARGET_OPCODE
+#define HANDLE_TARGET_OPCODE(OPC, NUM)
+#endif
+
+/// HANDLE_TARGET_OPCODE_MARKER defines an alternative identifier for an opcode.
+///
+#ifndef HANDLE_TARGET_OPCODE_MARKER
+#define HANDLE_TARGET_OPCODE_MARKER(IDENT, OPC)
+#endif
+
+/// Every instruction defined here must also appear in Target.td.
+///
+HANDLE_TARGET_OPCODE(PHI)
+HANDLE_TARGET_OPCODE(INLINEASM)
+HANDLE_TARGET_OPCODE(CFI_INSTRUCTION)
+HANDLE_TARGET_OPCODE(EH_LABEL)
+HANDLE_TARGET_OPCODE(GC_LABEL)
+HANDLE_TARGET_OPCODE(ANNOTATION_LABEL)
+
+/// KILL - This instruction is a noop that is used only to adjust the
+/// liveness of registers. This can be useful when dealing with
+/// sub-registers.
+HANDLE_TARGET_OPCODE(KILL)
+
+/// EXTRACT_SUBREG - This instruction takes two operands: a register
+/// that has subregisters, and a subregister index. It returns the
+/// extracted subregister value. This is commonly used to implement
+/// truncation operations on target architectures which support it.
+HANDLE_TARGET_OPCODE(EXTRACT_SUBREG)
+
+/// INSERT_SUBREG - This instruction takes three operands: a register that
+/// has subregisters, a register providing an insert value, and a
+/// subregister index. It returns the value of the first register with the
+/// value of the second register inserted. The first register is often
+/// defined by an IMPLICIT_DEF, because it is commonly used to implement
+/// anyext operations on target architectures which support it.
+HANDLE_TARGET_OPCODE(INSERT_SUBREG)
+
+/// IMPLICIT_DEF - This is the MachineInstr-level equivalent of undef.
+HANDLE_TARGET_OPCODE(IMPLICIT_DEF)
+
+/// SUBREG_TO_REG - Assert the value of bits in a super register.
+/// The result of this instruction is the value of the second operand inserted
+/// into the subregister specified by the third operand. All other bits are
+/// assumed to be equal to the bits in the immediate integer constant in the
+/// first operand. This instruction just communicates information; No code
+/// should be generated.
+/// This is typically used after an instruction where the write to a subregister
+/// implicitly cleared the bits in the super registers.
+HANDLE_TARGET_OPCODE(SUBREG_TO_REG)
+
+/// COPY_TO_REGCLASS - This instruction is a placeholder for a plain
+/// register-to-register copy into a specific register class. This is only
+/// used between instruction selection and MachineInstr creation, before
+/// virtual registers have been created for all the instructions, and it's
+/// only needed in cases where the register classes implied by the
+/// instructions are insufficient. It is emitted as a COPY MachineInstr.
+  HANDLE_TARGET_OPCODE(COPY_TO_REGCLASS)
+
+/// DBG_VALUE - a mapping of the llvm.dbg.value intrinsic
+HANDLE_TARGET_OPCODE(DBG_VALUE)
+
+/// REG_SEQUENCE - This variadic instruction is used to form a register that
+/// represents a consecutive sequence of sub-registers. It's used as a
+/// register coalescing / allocation aid and must be eliminated before code
+/// emission.
+// In SDNode form, the first operand encodes the register class created by
+// the REG_SEQUENCE, while each subsequent pair names a vreg + subreg index
+// pair.  Once it has been lowered to a MachineInstr, the regclass operand
+// is no longer present.
+/// e.g. v1027 = REG_SEQUENCE v1024, 3, v1025, 4, v1026, 5
+/// After register coalescing references of v1024 should be replace with
+/// v1027:3, v1025 with v1027:4, etc.
+  HANDLE_TARGET_OPCODE(REG_SEQUENCE)
+
+/// COPY - Target-independent register copy. This instruction can also be
+/// used to copy between subregisters of virtual registers.
+  HANDLE_TARGET_OPCODE(COPY)
+
+/// BUNDLE - This instruction represents an instruction bundle. Instructions
+/// which immediately follow a BUNDLE instruction which are marked with
+/// 'InsideBundle' flag are inside the bundle.
+HANDLE_TARGET_OPCODE(BUNDLE)
+
+/// Lifetime markers.
+HANDLE_TARGET_OPCODE(LIFETIME_START)
+HANDLE_TARGET_OPCODE(LIFETIME_END)
+
+/// A Stackmap instruction captures the location of live variables at its
+/// position in the instruction stream. It is followed by a shadow of bytes
+/// that must lie within the function and not contain another stackmap.
+HANDLE_TARGET_OPCODE(STACKMAP)
+
+/// FEntry all - This is a marker instruction which gets translated into a raw fentry call.
+HANDLE_TARGET_OPCODE(FENTRY_CALL)
+
+/// Patchable call instruction - this instruction represents a call to a
+/// constant address, followed by a series of NOPs. It is intended to
+/// support optimizations for dynamic languages (such as javascript) that
+/// rewrite calls to runtimes with more efficient code sequences.
+/// This also implies a stack map.
+HANDLE_TARGET_OPCODE(PATCHPOINT)
+
+/// This pseudo-instruction loads the stack guard value. Targets which need
+/// to prevent the stack guard value or address from being spilled to the
+/// stack should override TargetLowering::emitLoadStackGuardNode and
+/// additionally expand this pseudo after register allocation.
+HANDLE_TARGET_OPCODE(LOAD_STACK_GUARD)
+
+/// Call instruction with associated vm state for deoptimization and list
+/// of live pointers for relocation by the garbage collector.  It is
+/// intended to support garbage collection with fully precise relocating
+/// collectors and deoptimizations in either the callee or caller.
+HANDLE_TARGET_OPCODE(STATEPOINT)
+
+/// Instruction that records the offset of a local stack allocation passed to
+/// llvm.localescape. It has two arguments: the symbol for the label and the
+/// frame index of the local stack allocation.
+HANDLE_TARGET_OPCODE(LOCAL_ESCAPE)
+
+/// Wraps a machine instruction which can fault, bundled with associated
+/// information on how to handle such a fault.
+/// For example loading instruction that may page fault, bundled with associated
+/// information on how to handle such a page fault.  It is intended to support
+/// "zero cost" null checks in managed languages by allowing LLVM to fold
+/// comparisons into existing memory operations.
+HANDLE_TARGET_OPCODE(FAULTING_OP)
+
+/// Wraps a machine instruction to add patchability constraints.  An
+/// instruction wrapped in PATCHABLE_OP has to either have a minimum
+/// size or be preceded with a nop of that size.  The first operand is
+/// an immediate denoting the minimum size of the instruction, the
+/// second operand is an immediate denoting the opcode of the original
+/// instruction.  The rest of the operands are the operands of the
+/// original instruction.
+HANDLE_TARGET_OPCODE(PATCHABLE_OP)
+
+/// This is a marker instruction which gets translated into a nop sled, useful
+/// for inserting instrumentation instructions at runtime.
+HANDLE_TARGET_OPCODE(PATCHABLE_FUNCTION_ENTER)
+
+/// Wraps a return instruction and its operands to enable adding nop sleds
+/// either before or after the return. The nop sleds are useful for inserting
+/// instrumentation instructions at runtime.
+/// The patch here replaces the return instruction.
+HANDLE_TARGET_OPCODE(PATCHABLE_RET)
+
+/// This is a marker instruction which gets translated into a nop sled, useful
+/// for inserting instrumentation instructions at runtime.
+/// The patch here prepends the return instruction.
+/// The same thing as in x86_64 is not possible for ARM because it has multiple
+/// return instructions. Furthermore, CPU allows parametrized and even
+/// conditional return instructions. In the current ARM implementation we are
+/// making use of the fact that currently LLVM doesn't seem to generate
+/// conditional return instructions.
+/// On ARM, the same instruction can be used for popping multiple registers
+/// from the stack and returning (it just pops pc register too), and LLVM
+/// generates it sometimes. So we can't insert the sled between this stack
+/// adjustment and the return without splitting the original instruction into 2
+/// instructions. So on ARM, rather than jumping into the exit trampoline, we
+/// call it, it does the tracing, preserves the stack and returns.
+HANDLE_TARGET_OPCODE(PATCHABLE_FUNCTION_EXIT)
+
+/// Wraps a tail call instruction and its operands to enable adding nop sleds
+/// either before or after the tail exit. We use this as a disambiguation from
+/// PATCHABLE_RET which specifically only works for return instructions.
+HANDLE_TARGET_OPCODE(PATCHABLE_TAIL_CALL)
+
+/// Wraps a logging call and its arguments with nop sleds. At runtime, this can be
+/// patched to insert instrumentation instructions.
+HANDLE_TARGET_OPCODE(PATCHABLE_EVENT_CALL)
+
+/// The following generic opcodes are not supposed to appear after ISel.
+/// This is something we might want to relax, but for now, this is convenient
+/// to produce diagnostics.
+
+/// Generic ADD instruction. This is an integer add.
+HANDLE_TARGET_OPCODE(G_ADD)
+HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_START, G_ADD)
+
+/// Generic SUB instruction. This is an integer sub.
+HANDLE_TARGET_OPCODE(G_SUB)
+
+// Generic multiply instruction.
+HANDLE_TARGET_OPCODE(G_MUL)
+
+// Generic signed division instruction.
+HANDLE_TARGET_OPCODE(G_SDIV)
+
+// Generic unsigned division instruction.
+HANDLE_TARGET_OPCODE(G_UDIV)
+
+// Generic signed remainder instruction.
+HANDLE_TARGET_OPCODE(G_SREM)
+
+// Generic unsigned remainder instruction.
+HANDLE_TARGET_OPCODE(G_UREM)
+
+/// Generic bitwise and instruction.
+HANDLE_TARGET_OPCODE(G_AND)
+
+/// Generic bitwise or instruction.
+HANDLE_TARGET_OPCODE(G_OR)
+
+/// Generic bitwise exclusive-or instruction.
+HANDLE_TARGET_OPCODE(G_XOR)
+
+
+HANDLE_TARGET_OPCODE(G_IMPLICIT_DEF)
+
+/// Generic PHI instruction with types.
+HANDLE_TARGET_OPCODE(G_PHI)
+
+/// Generic instruction to materialize the address of an alloca or other
+/// stack-based object.
+HANDLE_TARGET_OPCODE(G_FRAME_INDEX)
+
+/// Generic reference to global value.
+HANDLE_TARGET_OPCODE(G_GLOBAL_VALUE)
+
+/// Generic instruction to extract blocks of bits from the register given
+/// (typically a sub-register COPY after instruction selection).
+HANDLE_TARGET_OPCODE(G_EXTRACT)
+
+HANDLE_TARGET_OPCODE(G_UNMERGE_VALUES)
+
+/// Generic instruction to insert blocks of bits from the registers given into
+/// the source.
+HANDLE_TARGET_OPCODE(G_INSERT)
+
+/// Generic instruction to paste a variable number of components together into a
+/// larger register.
+HANDLE_TARGET_OPCODE(G_MERGE_VALUES)
+
+/// Generic pointer to int conversion.
+HANDLE_TARGET_OPCODE(G_PTRTOINT)
+
+/// Generic int to pointer conversion.
+HANDLE_TARGET_OPCODE(G_INTTOPTR)
+
+/// Generic bitcast. The source and destination types must be different, or a
+/// COPY is the relevant instruction.
+HANDLE_TARGET_OPCODE(G_BITCAST)
+
+/// Generic load.
+HANDLE_TARGET_OPCODE(G_LOAD)
+
+/// Generic store.
+HANDLE_TARGET_OPCODE(G_STORE)
+
+/// Generic atomic cmpxchg with internal success check.
+HANDLE_TARGET_OPCODE(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
+
+/// Generic atomic cmpxchg.
+HANDLE_TARGET_OPCODE(G_ATOMIC_CMPXCHG)
+
+/// Generic atomicrmw.
+HANDLE_TARGET_OPCODE(G_ATOMICRMW_XCHG)
+HANDLE_TARGET_OPCODE(G_ATOMICRMW_ADD)
+HANDLE_TARGET_OPCODE(G_ATOMICRMW_SUB)
+HANDLE_TARGET_OPCODE(G_ATOMICRMW_AND)
+HANDLE_TARGET_OPCODE(G_ATOMICRMW_NAND)
+HANDLE_TARGET_OPCODE(G_ATOMICRMW_OR)
+HANDLE_TARGET_OPCODE(G_ATOMICRMW_XOR)
+HANDLE_TARGET_OPCODE(G_ATOMICRMW_MAX)
+HANDLE_TARGET_OPCODE(G_ATOMICRMW_MIN)
+HANDLE_TARGET_OPCODE(G_ATOMICRMW_UMAX)
+HANDLE_TARGET_OPCODE(G_ATOMICRMW_UMIN)
+
+/// Generic conditional branch instruction.
+HANDLE_TARGET_OPCODE(G_BRCOND)
+
+/// Generic indirect branch instruction.
+HANDLE_TARGET_OPCODE(G_BRINDIRECT)
+
+/// Generic intrinsic use (without side effects).
+HANDLE_TARGET_OPCODE(G_INTRINSIC)
+
+/// Generic intrinsic use (with side effects).
+HANDLE_TARGET_OPCODE(G_INTRINSIC_W_SIDE_EFFECTS)
+
+/// Generic extension allowing rubbish in high bits.
+HANDLE_TARGET_OPCODE(G_ANYEXT)
+
+/// Generic instruction to discard the high bits of a register. This differs
+/// from (G_EXTRACT val, 0) on its action on vectors: G_TRUNC will truncate
+/// each element individually, G_EXTRACT will typically discard the high
+/// elements of the vector.
+HANDLE_TARGET_OPCODE(G_TRUNC)
+
+/// Generic integer constant.
+HANDLE_TARGET_OPCODE(G_CONSTANT)
+
+/// Generic floating constant.
+HANDLE_TARGET_OPCODE(G_FCONSTANT)
+
+/// Generic va_start instruction. Stores to its one pointer operand.
+HANDLE_TARGET_OPCODE(G_VASTART)
+
+/// Generic va_start instruction. Stores to its one pointer operand.
+HANDLE_TARGET_OPCODE(G_VAARG)
+
+// Generic sign extend
+HANDLE_TARGET_OPCODE(G_SEXT)
+
+// Generic zero extend
+HANDLE_TARGET_OPCODE(G_ZEXT)
+
+// Generic left-shift
+HANDLE_TARGET_OPCODE(G_SHL)
+
+// Generic logical right-shift
+HANDLE_TARGET_OPCODE(G_LSHR)
+
+// Generic arithmetic right-shift
+HANDLE_TARGET_OPCODE(G_ASHR)
+
+/// Generic integer-base comparison, also applicable to vectors of integers.
+HANDLE_TARGET_OPCODE(G_ICMP)
+
+/// Generic floating-point comparison, also applicable to vectors.
+HANDLE_TARGET_OPCODE(G_FCMP)
+
+/// Generic select.
+HANDLE_TARGET_OPCODE(G_SELECT)
+
+/// Generic unsigned add instruction, consuming the normal operands plus a carry
+/// flag, and similarly producing the result and a carry flag.
+HANDLE_TARGET_OPCODE(G_UADDE)
+
+/// Generic unsigned subtract instruction, consuming the normal operands plus a
+/// carry flag, and similarly producing the result and a carry flag.
+HANDLE_TARGET_OPCODE(G_USUBE)
+
+/// Generic signed add instruction, producing the result and a signed overflow
+/// flag.
+HANDLE_TARGET_OPCODE(G_SADDO)
+
+/// Generic signed subtract instruction, producing the result and a signed
+/// overflow flag.
+HANDLE_TARGET_OPCODE(G_SSUBO)
+
+/// Generic unsigned multiply instruction, producing the result and a signed
+/// overflow flag.
+HANDLE_TARGET_OPCODE(G_UMULO)
+
+/// Generic signed multiply instruction, producing the result and a signed
+/// overflow flag.
+HANDLE_TARGET_OPCODE(G_SMULO)
+
+// Multiply two numbers at twice the incoming bit width (unsigned) and return
+// the high half of the result.
+HANDLE_TARGET_OPCODE(G_UMULH)
+
+// Multiply two numbers at twice the incoming bit width (signed) and return
+// the high half of the result.
+HANDLE_TARGET_OPCODE(G_SMULH)
+
+/// Generic FP addition.
+HANDLE_TARGET_OPCODE(G_FADD)
+
+/// Generic FP subtraction.
+HANDLE_TARGET_OPCODE(G_FSUB)
+
+/// Generic FP multiplication.
+HANDLE_TARGET_OPCODE(G_FMUL)
+
+/// Generic FMA multiplication. Behaves like llvm fma intrinsic
+HANDLE_TARGET_OPCODE(G_FMA)
+
+/// Generic FP division.
+HANDLE_TARGET_OPCODE(G_FDIV)
+
+/// Generic FP remainder.
+HANDLE_TARGET_OPCODE(G_FREM)
+
+/// Generic FP exponentiation.
+HANDLE_TARGET_OPCODE(G_FPOW)
+
+/// Generic base-e exponential of a value.
+HANDLE_TARGET_OPCODE(G_FEXP)
+
+/// Generic base-2 exponential of a value.
+HANDLE_TARGET_OPCODE(G_FEXP2)
+
+/// Floating point base-e logarithm of a value.
+HANDLE_TARGET_OPCODE(G_FLOG)
+
+/// Floating point base-2 logarithm of a value.
+HANDLE_TARGET_OPCODE(G_FLOG2)
+
+/// Generic FP negation.
+HANDLE_TARGET_OPCODE(G_FNEG)
+
+/// Generic FP extension.
+HANDLE_TARGET_OPCODE(G_FPEXT)
+
+/// Generic float to signed-int conversion
+HANDLE_TARGET_OPCODE(G_FPTRUNC)
+
+/// Generic float to signed-int conversion
+HANDLE_TARGET_OPCODE(G_FPTOSI)
+
+/// Generic float to unsigned-int conversion
+HANDLE_TARGET_OPCODE(G_FPTOUI)
+
+/// Generic signed-int to float conversion
+HANDLE_TARGET_OPCODE(G_SITOFP)
+
+/// Generic unsigned-int to float conversion
+HANDLE_TARGET_OPCODE(G_UITOFP)
+
+/// Generic pointer offset
+HANDLE_TARGET_OPCODE(G_GEP)
+
+/// Clear the specified number of low bits in a pointer. This rounds the value
+/// *down* to the given alignment.
+HANDLE_TARGET_OPCODE(G_PTR_MASK)
+
+/// Generic BRANCH instruction. This is an unconditional branch.
+HANDLE_TARGET_OPCODE(G_BR)
+
+/// Generic insertelement.
+HANDLE_TARGET_OPCODE(G_INSERT_VECTOR_ELT)
+
+/// Generic extractelement.
+HANDLE_TARGET_OPCODE(G_EXTRACT_VECTOR_ELT)
+
+/// Generic shufflevector.
+HANDLE_TARGET_OPCODE(G_SHUFFLE_VECTOR)
+
+/// Generic byte swap.
+HANDLE_TARGET_OPCODE(G_BSWAP)
+
+// TODO: Add more generic opcodes as we move along.
+
+/// Marker for the end of the generic opcode.
+/// This is used to check if an opcode is in the range of the
+/// generic opcodes.
+HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_END, G_BSWAP)
+
+/// BUILTIN_OP_END - This must be the last enum value in this list.
+/// The target-specific post-isel opcode values start here.
+HANDLE_TARGET_OPCODE_MARKER(GENERIC_OP_END, PRE_ISEL_GENERIC_OPCODE_END)
diff --git a/include/llvm/CodeGen/TargetOpcodes.h b/include/llvm/CodeGen/TargetOpcodes.h
new file mode 100644
index 0000000000000..3ca31a9709446
--- /dev/null
+++ b/include/llvm/CodeGen/TargetOpcodes.h
@@ -0,0 +1,42 @@
+//===-- llvm/CodeGen/TargetOpcodes.h - Target Indep Opcodes -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the target independent instruction opcodes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_TARGETOPCODES_H
+#define LLVM_CODEGEN_TARGETOPCODES_H
+
+namespace llvm {
+
+/// Invariant opcodes: All instruction sets have these as their low opcodes.
+///
+namespace TargetOpcode {
+enum {
+#define HANDLE_TARGET_OPCODE(OPC) OPC,
+#define HANDLE_TARGET_OPCODE_MARKER(IDENT, OPC) IDENT = OPC,
+#include "llvm/CodeGen/TargetOpcodes.def"
+};
+} // end namespace TargetOpcode
+
+/// Check whether the given Opcode is a generic opcode that is not supposed
+/// to appear after ISel.
+inline bool isPreISelGenericOpcode(unsigned Opcode) {
+  return Opcode >= TargetOpcode::PRE_ISEL_GENERIC_OPCODE_START &&
+         Opcode <= TargetOpcode::PRE_ISEL_GENERIC_OPCODE_END;
+}
+
+/// Check whether the given Opcode is a target-specific opcode.
+inline bool isTargetSpecificOpcode(unsigned Opcode) {
+  return Opcode > TargetOpcode::PRE_ISEL_GENERIC_OPCODE_END;
+}
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/CodeGen/TargetPassConfig.h b/include/llvm/CodeGen/TargetPassConfig.h
index aaf0ab5d5481d..1aaa85d77a54f 100644
--- a/include/llvm/CodeGen/TargetPassConfig.h
+++ b/include/llvm/CodeGen/TargetPassConfig.h
@@ -108,6 +108,18 @@ private:
   bool Stopped = false;
   bool AddingMachinePasses = false;
 
+  /// Set the StartAfter, StartBefore and StopAfter passes to allow running only
+  /// a portion of the normal code-gen pass sequence.
+  ///
+  /// If the StartAfter and StartBefore pass ID is zero, then compilation will
+  /// begin at the normal point; otherwise, clear the Started flag to indicate
+  /// that passes should not be added until the starting pass is seen.  If the
+  /// Stop pass ID is zero, then compilation will continue to the end.
+  ///
+  /// This function expects that at least one of the StartAfter or the
+  /// StartBefore pass IDs is null.
+  void setStartStopPasses();
+
 protected:
   LLVMTargetMachine *TM;
   PassConfigImpl *Impl = nullptr; // Internal data structures
@@ -147,27 +159,25 @@ public:
 
   CodeGenOpt::Level getOptLevel() const;
 
-  /// Set the StartAfter, StartBefore and StopAfter passes to allow running only
-  /// a portion of the normal code-gen pass sequence.
-  ///
-  /// If the StartAfter and StartBefore pass ID is zero, then compilation will
-  /// begin at the normal point; otherwise, clear the Started flag to indicate
-  /// that passes should not be added until the starting pass is seen.  If the
-  /// Stop pass ID is zero, then compilation will continue to the end.
-  ///
-  /// This function expects that at least one of the StartAfter or the
-  /// StartBefore pass IDs is null.
-  void setStartStopPasses(AnalysisID StartBefore, AnalysisID StartAfter,
-                          AnalysisID StopBefore, AnalysisID StopAfter) {
-    assert(!(StartBefore && StartAfter) &&
-           "Start after and start before passes are given");
-    assert(!(StopBefore && StopAfter) &&
-           "Stop after and stop before passed are given");
-    this->StartBefore = StartBefore;
-    this->StartAfter = StartAfter;
-    this->StopBefore = StopBefore;
-    this->StopAfter = StopAfter;
-    Started = (StartAfter == nullptr) && (StartBefore == nullptr);
+  /// Describe the status of the codegen
+  /// pipeline set by this target pass config.
+  /// Having a limited codegen pipeline means that options
+  /// have been used to restrict what codegen is doing.
+  /// In particular, that means that codegen won't emit
+  /// assembly code.
+  bool hasLimitedCodeGenPipeline() const;
+
+  /// If hasLimitedCodeGenPipeline is true, this method
+  /// returns a string with the name of the options, separated
+  /// by \p Separator that caused this pipeline to be limited.
+  std::string
+  getLimitedCodeGenPipelineReason(const char *Separator = "/") const;
+
+  /// Check if the codegen pipeline is limited in such a way that it
+  /// won't be complete. When the codegen pipeline is not complete,
+  /// this means it may not be possible to generate assembly from it.
+  bool willCompleteCodeGenPipeline() const {
+    return !hasLimitedCodeGenPipeline() || (!StopAfter && !StopBefore);
   }
 
   void setDisableVerify(bool Disable) { setOpt(DisableVerify, Disable); }
diff --git a/include/llvm/CodeGen/TargetRegisterInfo.h b/include/llvm/CodeGen/TargetRegisterInfo.h
new file mode 100644
index 0000000000000..81907538fb0b1
--- /dev/null
+++ b/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -0,0 +1,1177 @@
+//==- CodeGen/TargetRegisterInfo.h - Target Register Information -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes an abstract interface used to get information about a
+// target machines register file.  This information is used for a variety of
+// purposed, especially register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_TARGETREGISTERINFO_H
+#define LLVM_CODEGEN_TARGETREGISTERINFO_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Printable.h"
+#include <cassert>
+#include <cstdint>
+#include <functional>
+
+namespace llvm {
+
+class BitVector;
+class LiveRegMatrix;
+class MachineFunction;
+class MachineInstr;
+class RegScavenger;
+class VirtRegMap;
+class LiveIntervals;
+
+class TargetRegisterClass {
+public:
+  using iterator = const MCPhysReg *;
+  using const_iterator = const MCPhysReg *;
+  using sc_iterator = const TargetRegisterClass* const *;
+
+  // Instance variables filled by tablegen, do not use!
+  const MCRegisterClass *MC;
+  const uint32_t *SubClassMask;
+  const uint16_t *SuperRegIndices;
+  const LaneBitmask LaneMask;
+  /// Classes with a higher priority value are assigned first by register
+  /// allocators using a greedy heuristic. The value is in the range [0,63].
+  const uint8_t AllocationPriority;
+  /// Whether the class supports two (or more) disjunct subregister indices.
+  const bool HasDisjunctSubRegs;
+  /// Whether a combination of subregisters can cover every register in the
+  /// class. See also the CoveredBySubRegs description in Target.td.
+  const bool CoveredBySubRegs;
+  const sc_iterator SuperClasses;
+  ArrayRef<MCPhysReg> (*OrderFunc)(const MachineFunction&);
+
+  /// Return the register class ID number.
+  unsigned getID() const { return MC->getID(); }
+
+  /// begin/end - Return all of the registers in this class.
+  ///
+  iterator       begin() const { return MC->begin(); }
+  iterator         end() const { return MC->end(); }
+
+  /// Return the number of registers in this class.
+  unsigned getNumRegs() const { return MC->getNumRegs(); }
+
+  iterator_range<SmallVectorImpl<MCPhysReg>::const_iterator>
+  getRegisters() const {
+    return make_range(MC->begin(), MC->end());
+  }
+
+  /// Return the specified register in the class.
+  unsigned getRegister(unsigned i) const {
+    return MC->getRegister(i);
+  }
+
+  /// Return true if the specified register is included in this register class.
+  /// This does not include virtual registers.
+  bool contains(unsigned Reg) const {
+    return MC->contains(Reg);
+  }
+
+  /// Return true if both registers are in this class.
+  bool contains(unsigned Reg1, unsigned Reg2) const {
+    return MC->contains(Reg1, Reg2);
+  }
+
+  /// Return the cost of copying a value between two registers in this class.
+  /// A negative number means the register class is very expensive
+  /// to copy e.g. status flag register classes.
+  int getCopyCost() const { return MC->getCopyCost(); }
+
+  /// Return true if this register class may be used to create virtual
+  /// registers.
+  bool isAllocatable() const { return MC->isAllocatable(); }
+
+  /// Return true if the specified TargetRegisterClass
+  /// is a proper sub-class of this TargetRegisterClass.
+  bool hasSubClass(const TargetRegisterClass *RC) const {
+    return RC != this && hasSubClassEq(RC);
+  }
+
+  /// Returns true if RC is a sub-class of or equal to this class.
+  bool hasSubClassEq(const TargetRegisterClass *RC) const {
+    unsigned ID = RC->getID();
+    return (SubClassMask[ID / 32] >> (ID % 32)) & 1;
+  }
+
+  /// Return true if the specified TargetRegisterClass is a
+  /// proper super-class of this TargetRegisterClass.
+  bool hasSuperClass(const TargetRegisterClass *RC) const {
+    return RC->hasSubClass(this);
+  }
+
+  /// Returns true if RC is a super-class of or equal to this class.
+  bool hasSuperClassEq(const TargetRegisterClass *RC) const {
+    return RC->hasSubClassEq(this);
+  }
+
+  /// Returns a bit vector of subclasses, including this one.
+  /// The vector is indexed by class IDs.
+  ///
+  /// To use it, consider the returned array as a chunk of memory that
+  /// contains an array of bits of size NumRegClasses. Each 32-bit chunk
+  /// contains a bitset of the ID of the subclasses in big-endian style.
+
+  /// I.e., the representation of the memory from left to right at the
+  /// bit level looks like:
+  /// [31 30 ... 1 0] [ 63 62 ... 33 32] ...
+  ///                     [ XXX NumRegClasses NumRegClasses - 1 ... ]
+  /// Where the number represents the class ID and XXX bits that
+  /// should be ignored.
+  ///
+  /// See the implementation of hasSubClassEq for an example of how it
+  /// can be used.
+  const uint32_t *getSubClassMask() const {
+    return SubClassMask;
+  }
+
+  /// Returns a 0-terminated list of sub-register indices that project some
+  /// super-register class into this register class. The list has an entry for
+  /// each Idx such that:
+  ///
+  ///   There exists SuperRC where:
+  ///     For all Reg in SuperRC:
+  ///       this->contains(Reg:Idx)
+  const uint16_t *getSuperRegIndices() const {
+    return SuperRegIndices;
+  }
+
+  /// Returns a NULL-terminated list of super-classes.  The
+  /// classes are ordered by ID which is also a topological ordering from large
+  /// to small classes.  The list does NOT include the current class.
+  sc_iterator getSuperClasses() const {
+    return SuperClasses;
+  }
+
+  /// Return true if this TargetRegisterClass is a subset
+  /// class of at least one other TargetRegisterClass.
+  bool isASubClass() const {
+    return SuperClasses[0] != nullptr;
+  }
+
+  /// Returns the preferred order for allocating registers from this register
+  /// class in MF. The raw order comes directly from the .td file and may
+  /// include reserved registers that are not allocatable.
+  /// Register allocators should also make sure to allocate
+  /// callee-saved registers only after all the volatiles are used. The
+  /// RegisterClassInfo class provides filtered allocation orders with
+  /// callee-saved registers moved to the end.
+  ///
+  /// The MachineFunction argument can be used to tune the allocatable
+  /// registers based on the characteristics of the function, subtarget, or
+  /// other criteria.
+  ///
+  /// By default, this method returns all registers in the class.
+  ArrayRef<MCPhysReg> getRawAllocationOrder(const MachineFunction &MF) const {
+    return OrderFunc ? OrderFunc(MF) : makeArrayRef(begin(), getNumRegs());
+  }
+
+  /// Returns the combination of all lane masks of register in this class.
+  /// The lane masks of the registers are the combination of all lane masks
+  /// of their subregisters. Returns 1 if there are no subregisters.
+  LaneBitmask getLaneMask() const {
+    return LaneMask;
+  }
+};
+
+/// Extra information, not in MCRegisterDesc, about registers.
+/// These are used by codegen, not by MC.
+struct TargetRegisterInfoDesc {
+  unsigned CostPerUse;          // Extra cost of instructions using register.
+  bool inAllocatableClass;      // Register belongs to an allocatable regclass.
+};
+
+/// Each TargetRegisterClass has a per register weight, and weight
+/// limit which must be less than the limits of its pressure sets.
+struct RegClassWeight {
+  unsigned RegWeight;
+  unsigned WeightLimit;
+};
+
+/// TargetRegisterInfo base class - We assume that the target defines a static
+/// array of TargetRegisterDesc objects that represent all of the machine
+/// registers that the target has.  As such, we simply have to track a pointer
+/// to this array so that we can turn register number into a register
+/// descriptor.
+///
+class TargetRegisterInfo : public MCRegisterInfo {
+public:
+  using regclass_iterator = const TargetRegisterClass * const *;
+  using vt_iterator = const MVT::SimpleValueType *;
+  struct RegClassInfo {
+    unsigned RegSize, SpillSize, SpillAlignment;
+    vt_iterator VTList;
+  };
+private:
+  const TargetRegisterInfoDesc *InfoDesc;     // Extra desc array for codegen
+  const char *const *SubRegIndexNames;        // Names of subreg indexes.
+  // Pointer to array of lane masks, one per sub-reg index.
+  const LaneBitmask *SubRegIndexLaneMasks;
+
+  regclass_iterator RegClassBegin, RegClassEnd;   // List of regclasses
+  LaneBitmask CoveringLanes;
+  const RegClassInfo *const RCInfos;
+  unsigned HwMode;
+
+protected:
+  TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
+                     regclass_iterator RegClassBegin,
+                     regclass_iterator RegClassEnd,
+                     const char *const *SRINames,
+                     const LaneBitmask *SRILaneMasks,
+                     LaneBitmask CoveringLanes,
+                     const RegClassInfo *const RSI,
+                     unsigned Mode = 0);
+  virtual ~TargetRegisterInfo();
+
+public:
+  // Register numbers can represent physical registers, virtual registers, and
+  // sometimes stack slots. The unsigned values are divided into these ranges:
+  //
+  //   0           Not a register, can be used as a sentinel.
+  //   [1;2^30)    Physical registers assigned by TableGen.
+  //   [2^30;2^31) Stack slots. (Rarely used.)
+  //   [2^31;2^32) Virtual registers assigned by MachineRegisterInfo.
+  //
+  // Further sentinels can be allocated from the small negative integers.
+  // DenseMapInfo<unsigned> uses -1u and -2u.
+
+  /// isStackSlot - Sometimes it is useful the be able to store a non-negative
+  /// frame index in a variable that normally holds a register. isStackSlot()
+  /// returns true if Reg is in the range used for stack slots.
+  ///
+  /// Note that isVirtualRegister() and isPhysicalRegister() cannot handle stack
+  /// slots, so if a variable may contains a stack slot, always check
+  /// isStackSlot() first.
+  ///
+  static bool isStackSlot(unsigned Reg) {
+    return int(Reg) >= (1 << 30);
+  }
+
+  /// Compute the frame index from a register value representing a stack slot.
+  static int stackSlot2Index(unsigned Reg) {
+    assert(isStackSlot(Reg) && "Not a stack slot");
+    return int(Reg - (1u << 30));
+  }
+
+  /// Convert a non-negative frame index to a stack slot register value.
+  static unsigned index2StackSlot(int FI) {
+    assert(FI >= 0 && "Cannot hold a negative frame index.");
+    return FI + (1u << 30);
+  }
+
+  /// Return true if the specified register number is in
+  /// the physical register namespace.
+  static bool isPhysicalRegister(unsigned Reg) {
+    assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first.");
+    return int(Reg) > 0;
+  }
+
+  /// Return true if the specified register number is in
+  /// the virtual register namespace.
+  static bool isVirtualRegister(unsigned Reg) {
+    assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first.");
+    return int(Reg) < 0;
+  }
+
+  /// Convert a virtual register number to a 0-based index.
+  /// The first virtual register in a function will get the index 0.
+  static unsigned virtReg2Index(unsigned Reg) {
+    assert(isVirtualRegister(Reg) && "Not a virtual register");
+    return Reg & ~(1u << 31);
+  }
+
+  /// Convert a 0-based index to a virtual register number.
+  /// This is the inverse operation of VirtReg2IndexFunctor below.
+  static unsigned index2VirtReg(unsigned Index) {
+    return Index | (1u << 31);
+  }
+
+  /// Return the size in bits of a register from class RC.
+  unsigned getRegSizeInBits(const TargetRegisterClass &RC) const {
+    return getRegClassInfo(RC).RegSize;
+  }
+
+  /// Return the size in bytes of the stack slot allocated to hold a spilled
+  /// copy of a register from class RC.
+  unsigned getSpillSize(const TargetRegisterClass &RC) const {
+    return getRegClassInfo(RC).SpillSize / 8;
+  }
+
+  /// Return the minimum required alignment in bytes for a spill slot for
+  /// a register of this class.
+  unsigned getSpillAlignment(const TargetRegisterClass &RC) const {
+    return getRegClassInfo(RC).SpillAlignment / 8;
+  }
+
+  /// Return true if the given TargetRegisterClass has the ValueType T.
+  bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const {
+    for (auto I = legalclasstypes_begin(RC); *I != MVT::Other; ++I)
+      if (MVT(*I) == T)
+        return true;
+    return false;
+  }
+
+  /// Loop over all of the value types that can be represented by values
+  /// in the given register class.
+  vt_iterator legalclasstypes_begin(const TargetRegisterClass &RC) const {
+    return getRegClassInfo(RC).VTList;
+  }
+
+  vt_iterator legalclasstypes_end(const TargetRegisterClass &RC) const {
+    vt_iterator I = legalclasstypes_begin(RC);
+    while (*I != MVT::Other)
+      ++I;
+    return I;
+  }
+
+  /// Returns the Register Class of a physical register of the given type,
+  /// picking the most sub register class of the right type that contains this
+  /// physreg.
+  const TargetRegisterClass *
+    getMinimalPhysRegClass(unsigned Reg, MVT VT = MVT::Other) const;
+
+  /// Return the maximal subclass of the given register class that is
+  /// allocatable or NULL.
+  const TargetRegisterClass *
+    getAllocatableClass(const TargetRegisterClass *RC) const;
+
+  /// Returns a bitset indexed by register number indicating if a register is
+  /// allocatable or not. If a register class is specified, returns the subset
+  /// for the class.
+  BitVector getAllocatableSet(const MachineFunction &MF,
+                              const TargetRegisterClass *RC = nullptr) const;
+
+  /// Return the additional cost of using this register instead
+  /// of other registers in its class.
+  unsigned getCostPerUse(unsigned RegNo) const {
+    return InfoDesc[RegNo].CostPerUse;
+  }
+
+  /// Return true if the register is in the allocation of any register class.
+  bool isInAllocatableClass(unsigned RegNo) const {
+    return InfoDesc[RegNo].inAllocatableClass;
+  }
+
+  /// Return the human-readable symbolic target-specific
+  /// name for the specified SubRegIndex.
+  const char *getSubRegIndexName(unsigned SubIdx) const {
+    assert(SubIdx && SubIdx < getNumSubRegIndices() &&
+           "This is not a subregister index");
+    return SubRegIndexNames[SubIdx-1];
+  }
+
+  /// Return a bitmask representing the parts of a register that are covered by
+  /// SubIdx \see LaneBitmask.
+  ///
+  /// SubIdx == 0 is allowed, it has the lane mask ~0u.
+  LaneBitmask getSubRegIndexLaneMask(unsigned SubIdx) const {
+    assert(SubIdx < getNumSubRegIndices() && "This is not a subregister index");
+    return SubRegIndexLaneMasks[SubIdx];
+  }
+
+  /// The lane masks returned by getSubRegIndexLaneMask() above can only be
+  /// used to determine if sub-registers overlap - they can't be used to
+  /// determine if a set of sub-registers completely cover another
+  /// sub-register.
+  ///
+  /// The X86 general purpose registers have two lanes corresponding to the
+  /// sub_8bit and sub_8bit_hi sub-registers. Both sub_32bit and sub_16bit have
+  /// lane masks '3', but the sub_16bit sub-register doesn't fully cover the
+  /// sub_32bit sub-register.
+  ///
+  /// On the other hand, the ARM NEON lanes fully cover their registers: The
+  /// dsub_0 sub-register is completely covered by the ssub_0 and ssub_1 lanes.
+  /// This is related to the CoveredBySubRegs property on register definitions.
+  ///
+  /// This function returns a bit mask of lanes that completely cover their
+  /// sub-registers. More precisely, given:
+  ///
+  ///   Covering = getCoveringLanes();
+  ///   MaskA = getSubRegIndexLaneMask(SubA);
+  ///   MaskB = getSubRegIndexLaneMask(SubB);
+  ///
+  /// If (MaskA & ~(MaskB & Covering)) == 0, then SubA is completely covered by
+  /// SubB.
+  LaneBitmask getCoveringLanes() const { return CoveringLanes; }
+
+  /// Returns true if the two registers are equal or alias each other.
+  /// The registers may be virtual registers.
+  bool regsOverlap(unsigned regA, unsigned regB) const {
+    if (regA == regB) return true;
+    if (isVirtualRegister(regA) || isVirtualRegister(regB))
+      return false;
+
+    // Regunits are numerically ordered. Find a common unit.
+    MCRegUnitIterator RUA(regA, this);
+    MCRegUnitIterator RUB(regB, this);
+    do {
+      if (*RUA == *RUB) return true;
+      if (*RUA < *RUB) ++RUA;
+      else             ++RUB;
+    } while (RUA.isValid() && RUB.isValid());
+    return false;
+  }
+
+  /// Returns true if Reg contains RegUnit.
+  bool hasRegUnit(unsigned Reg, unsigned RegUnit) const {
+    for (MCRegUnitIterator Units(Reg, this); Units.isValid(); ++Units)
+      if (*Units == RegUnit)
+        return true;
+    return false;
+  }
+
+  /// Return a null-terminated list of all of the callee-saved registers on
+  /// this target. The register should be in the order of desired callee-save
+  /// stack frame offset. The first register is closest to the incoming stack
+  /// pointer if stack grows down, and vice versa.
+  /// Notice: This function does not take into account disabled CSRs.
+  ///         In most cases you will want to use instead the function 
+  ///         getCalleeSavedRegs that is implemented in MachineRegisterInfo.
+  virtual const MCPhysReg*
+  getCalleeSavedRegs(const MachineFunction *MF) const = 0;
+
+  /// Return a mask of call-preserved registers for the given calling convention
+  /// on the current function. The mask should include all call-preserved
+  /// aliases. This is used by the register allocator to determine which
+  /// registers can be live across a call.
+  ///
+  /// The mask is an array containing (TRI::getNumRegs()+31)/32 entries.
+  /// A set bit indicates that all bits of the corresponding register are
+  /// preserved across the function call.  The bit mask is expected to be
+  /// sub-register complete, i.e. if A is preserved, so are all its
+  /// sub-registers.
+  ///
+  /// Bits are numbered from the LSB, so the bit for physical register Reg can
+  /// be found as (Mask[Reg / 32] >> Reg % 32) & 1.
+  ///
+  /// A NULL pointer means that no register mask will be used, and call
+  /// instructions should use implicit-def operands to indicate call clobbered
+  /// registers.
+  ///
+  virtual const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+                                               CallingConv::ID) const {
+    // The default mask clobbers everything.  All targets should override.
+    return nullptr;
+  }
+
+  /// Return a register mask that clobbers everything.
+  virtual const uint32_t *getNoPreservedMask() const {
+    llvm_unreachable("target does not provide no preserved mask");
+  }
+
+  /// Return true if all bits that are set in mask \p mask0 are also set in
+  /// \p mask1.
+  bool regmaskSubsetEqual(const uint32_t *mask0, const uint32_t *mask1) const;
+
+  /// Return all the call-preserved register masks defined for this target.
+  virtual ArrayRef<const uint32_t *> getRegMasks() const = 0;
+  virtual ArrayRef<const char *> getRegMaskNames() const = 0;
+
+  /// Returns a bitset indexed by physical register number indicating if a
+  /// register is a special register that has particular uses and should be
+  /// considered unavailable at all times, e.g. stack pointer, return address.
+  /// A reserved register:
+  /// - is not allocatable
+  /// - is considered always live
+  /// - is ignored by liveness tracking
+  /// It is often necessary to reserve the super registers of a reserved
+  /// register as well, to avoid them getting allocated indirectly. You may use
+  /// markSuperRegs() and checkAllSuperRegsMarked() in this case.
+  virtual BitVector getReservedRegs(const MachineFunction &MF) const = 0;
+
+  /// Returns true if PhysReg is unallocatable and constant throughout the
+  /// function.  Used by MachineRegisterInfo::isConstantPhysReg().
+  virtual bool isConstantPhysReg(unsigned PhysReg) const { return false; }
+
+  /// Physical registers that may be modified within a function but are
+  /// guaranteed to be restored before any uses. This is useful for targets that
+  /// have call sequences where a GOT register may be updated by the caller
+  /// prior to a call and is guaranteed to be restored (also by the caller)
+  /// after the call. 
+  virtual bool isCallerPreservedPhysReg(unsigned PhysReg,
+                                        const MachineFunction &MF) const {
+    return false;
+  }
+
+  /// Prior to adding the live-out mask to a stackmap or patchpoint
+  /// instruction, provide the target the opportunity to adjust it (mainly to
+  /// remove pseudo-registers that should be ignored).
+  virtual void adjustStackMapLiveOutMask(uint32_t *Mask) const {}
+
+  /// Return a super-register of the specified register
+  /// Reg so its sub-register of index SubIdx is Reg.
+  unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx,
+                               const TargetRegisterClass *RC) const {
+    return MCRegisterInfo::getMatchingSuperReg(Reg, SubIdx, RC->MC);
+  }
+
+  /// Return a subclass of the specified register
+  /// class A so that each register in it has a sub-register of the
+  /// specified sub-register index which is in the specified register class B.
+  ///
+  /// TableGen will synthesize missing A sub-classes.
+  virtual const TargetRegisterClass *
+  getMatchingSuperRegClass(const TargetRegisterClass *A,
+                           const TargetRegisterClass *B, unsigned Idx) const;
+
+  // For a copy-like instruction that defines a register of class DefRC with
+  // subreg index DefSubReg, reading from another source with class SrcRC and
+  // subregister SrcSubReg return true if this is a preferable copy
+  // instruction or an earlier use should be used.
+  virtual bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
+                                    unsigned DefSubReg,
+                                    const TargetRegisterClass *SrcRC,
+                                    unsigned SrcSubReg) const;
+
+  /// Returns the largest legal sub-class of RC that
+  /// supports the sub-register index Idx.
+  /// If no such sub-class exists, return NULL.
+  /// If all registers in RC already have an Idx sub-register, return RC.
+  ///
+  /// TableGen generates a version of this function that is good enough in most
+  /// cases.  Targets can override if they have constraints that TableGen
+  /// doesn't understand.  For example, the x86 sub_8bit sub-register index is
+  /// supported by the full GR32 register class in 64-bit mode, but only by the
+  /// GR32_ABCD regiister class in 32-bit mode.
+  ///
+  /// TableGen will synthesize missing RC sub-classes.
+  virtual const TargetRegisterClass *
+  getSubClassWithSubReg(const TargetRegisterClass *RC, unsigned Idx) const {
+    assert(Idx == 0 && "Target has no sub-registers");
+    return RC;
+  }
+
+  /// Return the subregister index you get from composing
+  /// two subregister indices.
+  ///
+  /// The special null sub-register index composes as the identity.
+  ///
+  /// If R:a:b is the same register as R:c, then composeSubRegIndices(a, b)
+  /// returns c. Note that composeSubRegIndices does not tell you about illegal
+  /// compositions. If R does not have a subreg a, or R:a does not have a subreg
+  /// b, composeSubRegIndices doesn't tell you.
+  ///
+  /// The ARM register Q0 has two D subregs dsub_0:D0 and dsub_1:D1. It also has
+  /// ssub_0:S0 - ssub_3:S3 subregs.
+  /// If you compose subreg indices dsub_1, ssub_0 you get ssub_2.
+  unsigned composeSubRegIndices(unsigned a, unsigned b) const {
+    if (!a) return b;
+    if (!b) return a;
+    return composeSubRegIndicesImpl(a, b);
+  }
+
+  /// Transforms a LaneMask computed for one subregister to the lanemask that
+  /// would have been computed when composing the subsubregisters with IdxA
+  /// first. @sa composeSubRegIndices()
+  LaneBitmask composeSubRegIndexLaneMask(unsigned IdxA,
+                                         LaneBitmask Mask) const {
+    if (!IdxA)
+      return Mask;
+    return composeSubRegIndexLaneMaskImpl(IdxA, Mask);
+  }
+
+  /// Transform a lanemask given for a virtual register to the corresponding
+  /// lanemask before using subregister with index \p IdxA.
+  /// This is the reverse of composeSubRegIndexLaneMask(), assuming Mask is a
+  /// valie lane mask (no invalid bits set) the following holds:
+  /// X0 = composeSubRegIndexLaneMask(Idx, Mask)
+  /// X1 = reverseComposeSubRegIndexLaneMask(Idx, X0)
+  /// => X1 == Mask
+  LaneBitmask reverseComposeSubRegIndexLaneMask(unsigned IdxA,
+                                                LaneBitmask LaneMask) const {
+    if (!IdxA)
+      return LaneMask;
+    return reverseComposeSubRegIndexLaneMaskImpl(IdxA, LaneMask);
+  }
+
+  /// Debugging helper: dump register in human readable form to dbgs() stream.
+  static void dumpReg(unsigned Reg, unsigned SubRegIndex = 0,
+                      const TargetRegisterInfo* TRI = nullptr);
+
+protected:
+  /// Overridden by TableGen in targets that have sub-registers.
+  virtual unsigned composeSubRegIndicesImpl(unsigned, unsigned) const {
+    llvm_unreachable("Target has no sub-registers");
+  }
+
+  /// Overridden by TableGen in targets that have sub-registers.
+  virtual LaneBitmask
+  composeSubRegIndexLaneMaskImpl(unsigned, LaneBitmask) const {
+    llvm_unreachable("Target has no sub-registers");
+  }
+
+  virtual LaneBitmask reverseComposeSubRegIndexLaneMaskImpl(unsigned,
+                                                            LaneBitmask) const {
+    llvm_unreachable("Target has no sub-registers");
+  }
+
+public:
+  /// Find a common super-register class if it exists.
+  ///
+  /// Find a register class, SuperRC and two sub-register indices, PreA and
+  /// PreB, such that:
+  ///
+  ///   1. PreA + SubA == PreB + SubB  (using composeSubRegIndices()), and
+  ///
+  ///   2. For all Reg in SuperRC: Reg:PreA in RCA and Reg:PreB in RCB, and
+  ///
+  ///   3. SuperRC->getSize() >= max(RCA->getSize(), RCB->getSize()).
+  ///
+  /// SuperRC will be chosen such that no super-class of SuperRC satisfies the
+  /// requirements, and there is no register class with a smaller spill size
+  /// that satisfies the requirements.
+  ///
+  /// SubA and SubB must not be 0. Use getMatchingSuperRegClass() instead.
+  ///
+  /// Either of the PreA and PreB sub-register indices may be returned as 0. In
+  /// that case, the returned register class will be a sub-class of the
+  /// corresponding argument register class.
+  ///
+  /// The function returns NULL if no register class can be found.
+  const TargetRegisterClass*
+  getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA,
+                         const TargetRegisterClass *RCB, unsigned SubB,
+                         unsigned &PreA, unsigned &PreB) const;
+
+  //===--------------------------------------------------------------------===//
+  // Register Class Information
+  //
+protected:
+  const RegClassInfo &getRegClassInfo(const TargetRegisterClass &RC) const {
+    return RCInfos[getNumRegClasses() * HwMode + RC.getID()];
+  }
+
+public:
+  /// Register class iterators
+  regclass_iterator regclass_begin() const { return RegClassBegin; }
+  regclass_iterator regclass_end() const { return RegClassEnd; }
+  iterator_range<regclass_iterator> regclasses() const {
+    return make_range(regclass_begin(), regclass_end());
+  }
+
+  unsigned getNumRegClasses() const {
+    return (unsigned)(regclass_end()-regclass_begin());
+  }
+
+  /// Returns the register class associated with the enumeration value.
+  /// See class MCOperandInfo.
+  const TargetRegisterClass *getRegClass(unsigned i) const {
+    assert(i < getNumRegClasses() && "Register Class ID out of range");
+    return RegClassBegin[i];
+  }
+
+  /// Returns the name of the register class.
+  const char *getRegClassName(const TargetRegisterClass *Class) const {
+    return MCRegisterInfo::getRegClassName(Class->MC);
+  }
+
+  /// Find the largest common subclass of A and B.
+  /// Return NULL if there is no common subclass.
+  /// The common subclass should contain
+  /// simple value type SVT if it is not the Any type.
+  const TargetRegisterClass *
+  getCommonSubClass(const TargetRegisterClass *A,
+                    const TargetRegisterClass *B,
+                    const MVT::SimpleValueType SVT =
+                    MVT::SimpleValueType::Any) const;
+
+  /// Returns a TargetRegisterClass used for pointer values.
+  /// If a target supports multiple different pointer register classes,
+  /// kind specifies which one is indicated.
+  virtual const TargetRegisterClass *
+  getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const {
+    llvm_unreachable("Target didn't implement getPointerRegClass!");
+  }
+
+  /// Returns a legal register class to copy a register in the specified class
+  /// to or from. If it is possible to copy the register directly without using
+  /// a cross register class copy, return the specified RC. Returns NULL if it
+  /// is not possible to copy between two registers of the specified class.
+  virtual const TargetRegisterClass *
+  getCrossCopyRegClass(const TargetRegisterClass *RC) const {
+    return RC;
+  }
+
+  /// Returns the largest super class of RC that is legal to use in the current
+  /// sub-target and has the same spill size.
+  /// The returned register class can be used to create virtual registers which
+  /// means that all its registers can be copied and spilled.
+  virtual const TargetRegisterClass *
+  getLargestLegalSuperClass(const TargetRegisterClass *RC,
+                            const MachineFunction &) const {
+    /// The default implementation is very conservative and doesn't allow the
+    /// register allocator to inflate register classes.
+    return RC;
+  }
+
+  /// Return the register pressure "high water mark" for the specific register
+  /// class. The scheduler is in high register pressure mode (for the specific
+  /// register class) if it goes over the limit.
+  ///
+  /// Note: this is the old register pressure model that relies on a manually
+  /// specified representative register class per value type.
+  virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+                                       MachineFunction &MF) const {
+    return 0;
+  }
+
+  /// Return a heuristic for the machine scheduler to compare the profitability
+  /// of increasing one register pressure set versus another.  The scheduler
+  /// will prefer increasing the register pressure of the set which returns
+  /// the largest value for this function.
+  virtual unsigned getRegPressureSetScore(const MachineFunction &MF,
+                                          unsigned PSetID) const {
+    return PSetID;
+  }
+
+  /// Get the weight in units of pressure for this register class.
+  virtual const RegClassWeight &getRegClassWeight(
+    const TargetRegisterClass *RC) const = 0;
+
+  /// Get the weight in units of pressure for this register unit.
+  virtual unsigned getRegUnitWeight(unsigned RegUnit) const = 0;
+
+  /// Get the number of dimensions of register pressure.
+  virtual unsigned getNumRegPressureSets() const = 0;
+
+  /// Get the name of this register unit pressure set.
+  virtual const char *getRegPressureSetName(unsigned Idx) const = 0;
+
+  /// Get the register unit pressure limit for this dimension.
+  /// This limit must be adjusted dynamically for reserved registers.
+  virtual unsigned getRegPressureSetLimit(const MachineFunction &MF,
+                                          unsigned Idx) const = 0;
+
+  /// Get the dimensions of register pressure impacted by this register class.
+  /// Returns a -1 terminated array of pressure set IDs.
+  virtual const int *getRegClassPressureSets(
+    const TargetRegisterClass *RC) const = 0;
+
+  /// Get the dimensions of register pressure impacted by this register unit.
+  /// Returns a -1 terminated array of pressure set IDs.
+  virtual const int *getRegUnitPressureSets(unsigned RegUnit) const = 0;
+
+  /// Get a list of 'hint' registers that the register allocator should try
+  /// first when allocating a physical register for the virtual register
+  /// VirtReg. These registers are effectively moved to the front of the
+  /// allocation order. If true is returned, regalloc will try to only use
+  /// hints to the greatest extent possible even if it means spilling.
+  ///
+  /// The Order argument is the allocation order for VirtReg's register class
+  /// as returned from RegisterClassInfo::getOrder(). The hint registers must
+  /// come from Order, and they must not be reserved.
+  ///
+  /// The default implementation of this function will only add target
+  /// independent register allocation hints. Targets that override this
+  /// function should typically call this default implementation as well and
+  /// expect to see generic copy hints added.
+  virtual bool getRegAllocationHints(unsigned VirtReg,
+                                     ArrayRef<MCPhysReg> Order,
+                                     SmallVectorImpl<MCPhysReg> &Hints,
+                                     const MachineFunction &MF,
+                                     const VirtRegMap *VRM = nullptr,
+                                     const LiveRegMatrix *Matrix = nullptr)
+    const;
+
+  /// A callback to allow target a chance to update register allocation hints
+  /// when a register is "changed" (e.g. coalesced) to another register.
+  /// e.g. On ARM, some virtual registers should target register pairs,
+  /// if one of pair is coalesced to another register, the allocation hint of
+  /// the other half of the pair should be changed to point to the new register.
+  virtual void updateRegAllocHint(unsigned Reg, unsigned NewReg,
+                                  MachineFunction &MF) const {
+    // Do nothing.
+  }
+
+  /// The creation of multiple copy hints have been implemented in
+  /// weightCalcHelper(), but since this affects so many tests for many
+  /// targets, this is temporarily disabled per default. THIS SHOULD BE
+  /// "GENERAL GOODNESS" and hopefully all targets will update their tests
+  /// and enable this soon. This hook should then be removed.
+  virtual bool enableMultipleCopyHints() const { return false; }
+
+  /// Allow the target to reverse allocation order of local live ranges. This
+  /// will generally allocate shorter local live ranges first. For targets with
+  /// many registers, this could reduce regalloc compile time by a large
+  /// factor. It is disabled by default for three reasons:
+  /// (1) Top-down allocation is simpler and easier to debug for targets that
+  /// don't benefit from reversing the order.
+  /// (2) Bottom-up allocation could result in poor evicition decisions on some
+  /// targets affecting the performance of compiled code.
+  /// (3) Bottom-up allocation is no longer guaranteed to optimally color.
+  virtual bool reverseLocalAssignment() const { return false; }
+
+  /// Allow the target to override the cost of using a callee-saved register for
+  /// the first time. Default value of 0 means we will use a callee-saved
+  /// register if it is available.
+  virtual unsigned getCSRFirstUseCost() const { return 0; }
+
+  /// Returns true if the target requires (and can make use of) the register
+  /// scavenger.
+  virtual bool requiresRegisterScavenging(const MachineFunction &MF) const {
+    return false;
+  }
+
+  /// Returns true if the target wants to use frame pointer based accesses to
+  /// spill to the scavenger emergency spill slot.
+  virtual bool useFPForScavengingIndex(const MachineFunction &MF) const {
+    return true;
+  }
+
+  /// Returns true if the target requires post PEI scavenging of registers for
+  /// materializing frame index constants.
+  virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const {
+    return false;
+  }
+
+  /// Returns true if the target requires using the RegScavenger directly for
+  /// frame elimination despite using requiresFrameIndexScavenging.
+  virtual bool requiresFrameIndexReplacementScavenging(
+      const MachineFunction &MF) const {
+    return false;
+  }
+
+  /// Returns true if the target wants the LocalStackAllocation pass to be run
+  /// and virtual base registers used for more efficient stack access.
+  virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const {
+    return false;
+  }
+
+  /// Return true if target has reserved a spill slot in the stack frame of
+  /// the given function for the specified register. e.g. On x86, if the frame
+  /// register is required, the first fixed stack object is reserved as its
+  /// spill slot. This tells PEI not to create a new stack frame
+  /// object for the given register. It should be called only after
+  /// determineCalleeSaves().
+  virtual bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
+                                    int &FrameIdx) const {
+    return false;
+  }
+
+  /// Returns true if the live-ins should be tracked after register allocation.
+  virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
+    return false;
+  }
+
+  /// True if the stack can be realigned for the target.
+  virtual bool canRealignStack(const MachineFunction &MF) const;
+
+  /// True if storage within the function requires the stack pointer to be
+  /// aligned more than the normal calling convention calls for.
+  /// This cannot be overriden by the target, but canRealignStack can be
+  /// overridden.
+  bool needsStackRealignment(const MachineFunction &MF) const;
+
+  /// Get the offset from the referenced frame index in the instruction,
+  /// if there is one.
+  virtual int64_t getFrameIndexInstrOffset(const MachineInstr *MI,
+                                           int Idx) const {
+    return 0;
+  }
+
+  /// Returns true if the instruction's frame index reference would be better
+  /// served by a base register other than FP or SP.
+  /// Used by LocalStackFrameAllocation to determine which frame index
+  /// references it should create new base registers for.
+  virtual bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
+    return false;
+  }
+
+  /// Insert defining instruction(s) for BaseReg to be a pointer to FrameIdx
+  /// before insertion point I.
+  virtual void materializeFrameBaseRegister(MachineBasicBlock *MBB,
+                                            unsigned BaseReg, int FrameIdx,
+                                            int64_t Offset) const {
+    llvm_unreachable("materializeFrameBaseRegister does not exist on this "
+                     "target");
+  }
+
+  /// Resolve a frame index operand of an instruction
+  /// to reference the indicated base register plus offset instead.
+  virtual void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+                                 int64_t Offset) const {
+    llvm_unreachable("resolveFrameIndex does not exist on this target");
+  }
+
+  /// Determine whether a given base register plus offset immediate is
+  /// encodable to resolve a frame index.
+  virtual bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg,
+                                  int64_t Offset) const {
+    llvm_unreachable("isFrameOffsetLegal does not exist on this target");
+  }
+
+  /// Spill the register so it can be used by the register scavenger.
+  /// Return true if the register was spilled, false otherwise.
+  /// If this function does not spill the register, the scavenger
+  /// will instead spill it to the emergency spill slot.
+  virtual bool saveScavengerRegister(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I,
+                                     MachineBasicBlock::iterator &UseMI,
+                                     const TargetRegisterClass *RC,
+                                     unsigned Reg) const {
+    return false;
+  }
+
+  /// This method must be overriden to eliminate abstract frame indices from
+  /// instructions which may use them. The instruction referenced by the
+  /// iterator contains an MO_FrameIndex operand which must be eliminated by
+  /// this method. This method may modify or replace the specified instruction,
+  /// as long as it keeps the iterator pointing at the finished product.
+  /// SPAdj is the SP adjustment due to call frame setup instruction.
+  /// FIOperandNum is the FI operand number.
+  virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+                                   int SPAdj, unsigned FIOperandNum,
+                                   RegScavenger *RS = nullptr) const = 0;
+
+  /// Return the assembly name for \p Reg.
+  virtual StringRef getRegAsmName(unsigned Reg) const {
+    // FIXME: We are assuming that the assembly name is equal to the TableGen
+    // name converted to lower case
+    //
+    // The TableGen name is the name of the definition for this register in the
+    // target's tablegen files.  For example, the TableGen name of
+    // def EAX : Register <...>; is "EAX"
+    return StringRef(getName(Reg));
+  }
+
+  //===--------------------------------------------------------------------===//
+  /// Subtarget Hooks
+
+  /// \brief SrcRC and DstRC will be morphed into NewRC if this returns true.
+  virtual bool shouldCoalesce(MachineInstr *MI,
+                              const TargetRegisterClass *SrcRC,
+                              unsigned SubReg,
+                              const TargetRegisterClass *DstRC,
+                              unsigned DstSubReg,
+                              const TargetRegisterClass *NewRC,
+                              LiveIntervals &LIS) const
+  { return true; }
+
+  //===--------------------------------------------------------------------===//
+  /// Debug information queries.
+
+  /// getFrameRegister - This method should return the register used as a base
+  /// for values allocated in the current stack frame.
+  virtual unsigned getFrameRegister(const MachineFunction &MF) const = 0;
+
+  /// Mark a register and all its aliases as reserved in the given set.
+  void markSuperRegs(BitVector &RegisterSet, unsigned Reg) const;
+
+  /// Returns true if for every register in the set all super registers are part
+  /// of the set as well.
+  bool checkAllSuperRegsMarked(const BitVector &RegisterSet,
+      ArrayRef<MCPhysReg> Exceptions = ArrayRef<MCPhysReg>()) const;
+};
+
+//===----------------------------------------------------------------------===//
+//                           SuperRegClassIterator
+//===----------------------------------------------------------------------===//
+//
+// Iterate over the possible super-registers for a given register class. The
+// iterator will visit a list of pairs (Idx, Mask) corresponding to the
+// possible classes of super-registers.
+//
+// Each bit mask will have at least one set bit, and each set bit in Mask
+// corresponds to a SuperRC such that:
+//
+//   For all Reg in SuperRC: Reg:Idx is in RC.
+//
+// The iterator can include (O, RC->getSubClassMask()) as the first entry which
+// also satisfies the above requirement, assuming Reg:0 == Reg.
+//
+class SuperRegClassIterator {
+  const unsigned RCMaskWords;
+  unsigned SubReg = 0;
+  const uint16_t *Idx;
+  const uint32_t *Mask;
+
+public:
+  /// Create a SuperRegClassIterator that visits all the super-register classes
+  /// of RC. When IncludeSelf is set, also include the (0, sub-classes) entry.
+  SuperRegClassIterator(const TargetRegisterClass *RC,
+                        const TargetRegisterInfo *TRI,
+                        bool IncludeSelf = false)
+    : RCMaskWords((TRI->getNumRegClasses() + 31) / 32),
+      Idx(RC->getSuperRegIndices()), Mask(RC->getSubClassMask()) {
+    if (!IncludeSelf)
+      ++*this;
+  }
+
+  /// Returns true if this iterator is still pointing at a valid entry.
+  bool isValid() const { return Idx; }
+
+  /// Returns the current sub-register index.
+  unsigned getSubReg() const { return SubReg; }
+
+  /// Returns the bit mask of register classes that getSubReg() projects into
+  /// RC.
+  /// See TargetRegisterClass::getSubClassMask() for how to use it.
+  const uint32_t *getMask() const { return Mask; }
+
+  /// Advance iterator to the next entry.
+  void operator++() {
+    assert(isValid() && "Cannot move iterator past end.");
+    Mask += RCMaskWords;
+    SubReg = *Idx++;
+    if (!SubReg)
+      Idx = nullptr;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                           BitMaskClassIterator
+//===----------------------------------------------------------------------===//
+/// This class encapuslates the logic to iterate over bitmask returned by
+/// the various RegClass related APIs.
+/// E.g., this class can be used to iterate over the subclasses provided by
+/// TargetRegisterClass::getSubClassMask or SuperRegClassIterator::getMask.
+class BitMaskClassIterator {
+  /// Total number of register classes.
+  const unsigned NumRegClasses;
+  /// Base index of CurrentChunk.
+  /// In other words, the number of bit we read to get at the
+  /// beginning of that chunck.
+  unsigned Base = 0;
+  /// Adjust base index of CurrentChunk.
+  /// Base index + how many bit we read within CurrentChunk.
+  unsigned Idx = 0;
+  /// Current register class ID.
+  unsigned ID = 0;
+  /// Mask we are iterating over.
+  const uint32_t *Mask;
+  /// Current chunk of the Mask we are traversing.
+  uint32_t CurrentChunk;
+
+  /// Move ID to the next set bit.
+  void moveToNextID() {
+    // If the current chunk of memory is empty, move to the next one,
+    // while making sure we do not go pass the number of register
+    // classes.
+    while (!CurrentChunk) {
+      // Move to the next chunk.
+      Base += 32;
+      if (Base >= NumRegClasses) {
+        ID = NumRegClasses;
+        return;
+      }
+      CurrentChunk = *++Mask;
+      Idx = Base;
+    }
+    // Otherwise look for the first bit set from the right
+    // (representation of the class ID is big endian).
+    // See getSubClassMask for more details on the representation.
+    unsigned Offset = countTrailingZeros(CurrentChunk);
+    // Add the Offset to the adjusted base number of this chunk: Idx.
+    // This is the ID of the register class.
+    ID = Idx + Offset;
+
+    // Consume the zeros, if any, and the bit we just read
+    // so that we are at the right spot for the next call.
+    // Do not do Offset + 1 because Offset may be 31 and 32
+    // will be UB for the shift, though in that case we could
+    // have make the chunk being equal to 0, but that would
+    // have introduced a if statement.
+    moveNBits(Offset);
+    moveNBits(1);
+  }
+
+  /// Move \p NumBits Bits forward in CurrentChunk.
+  void moveNBits(unsigned NumBits) {
+    assert(NumBits < 32 && "Undefined behavior spotted!");
+    // Consume the bit we read for the next call.
+    CurrentChunk >>= NumBits;
+    // Adjust the base for the chunk.
+    Idx += NumBits;
+  }
+
+public:
+  /// Create a BitMaskClassIterator that visits all the register classes
+  /// represented by \p Mask.
+  ///
+  /// \pre \p Mask != nullptr
+  BitMaskClassIterator(const uint32_t *Mask, const TargetRegisterInfo &TRI)
+      : NumRegClasses(TRI.getNumRegClasses()), Mask(Mask), CurrentChunk(*Mask) {
+    // Move to the first ID.
+    moveToNextID();
+  }
+
+  /// Returns true if this iterator is still pointing at a valid entry.
+  bool isValid() const { return getID() != NumRegClasses; }
+
+  /// Returns the current register class ID.
+  unsigned getID() const { return ID; }
+
+  /// Advance iterator to the next entry.
+  void operator++() {
+    assert(isValid() && "Cannot move iterator past end.");
+    moveToNextID();
+  }
+};
+
+// This is useful when building IndexedMaps keyed on virtual registers
+struct VirtReg2IndexFunctor {
+  using argument_type = unsigned;
+  unsigned operator()(unsigned Reg) const {
+    return TargetRegisterInfo::virtReg2Index(Reg);
+  }
+};
+
+/// Prints virtual and physical registers with or without a TRI instance.
+///
+/// The format is:
+///   %noreg          - NoRegister
+///   %5              - a virtual register.
+///   %5:sub_8bit     - a virtual register with sub-register index (with TRI).
+///   %eax            - a physical register
+///   %physreg17      - a physical register when no TRI instance given.
+///
+/// Usage: OS << printReg(Reg, TRI, SubRegIdx) << '\n';
+Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI = nullptr,
+                   unsigned SubRegIdx = 0);
+
+/// Create Printable object to print register units on a \ref raw_ostream.
+///
+/// Register units are named after their root registers:
+///
+///   al      - Single root.
+///   fp0~st7 - Dual roots.
+///
+/// Usage: OS << printRegUnit(Unit, TRI) << '\n';
+Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI);
+
+/// \brief Create Printable object to print virtual registers and physical
+/// registers on a \ref raw_ostream.
+Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI);
+
+/// \brief Create Printable object to print register classes or register banks
+/// on a \ref raw_ostream.
+Printable printRegClassOrBank(unsigned Reg, const MachineRegisterInfo &RegInfo,
+                              const TargetRegisterInfo *TRI);
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_TARGETREGISTERINFO_H
diff --git a/include/llvm/CodeGen/TargetSchedule.h b/include/llvm/CodeGen/TargetSchedule.h
index f236679764688..1044f0bd27e6d 100644
--- a/include/llvm/CodeGen/TargetSchedule.h
+++ b/include/llvm/CodeGen/TargetSchedule.h
@@ -18,9 +18,9 @@
 
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/MC/MCSchedule.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 
 namespace llvm {
 
@@ -116,7 +116,7 @@ public:
     return SchedModel.getProcResource(PIdx);
   }
 
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   const char *getResourceName(unsigned PIdx) const {
     if (!PIdx)
       return "MOps";
diff --git a/include/llvm/CodeGen/TargetSubtargetInfo.h b/include/llvm/CodeGen/TargetSubtargetInfo.h
new file mode 100644
index 0000000000000..576522aef466e
--- /dev/null
+++ b/include/llvm/CodeGen/TargetSubtargetInfo.h
@@ -0,0 +1,255 @@
+//===- llvm/CodeGen/TargetSubtargetInfo.h - Target Information --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the subtarget options of a Target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_TARGETSUBTARGETINFO_H
+#define LLVM_CODEGEN_TARGETSUBTARGETINFO_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/PBQPRAConstraint.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/CodeGen.h"
+#include <memory>
+#include <vector>
+
+
+namespace llvm {
+
+class CallLowering;
+class InstrItineraryData;
+struct InstrStage;
+class InstructionSelector;
+class LegalizerInfo;
+class MachineInstr;
+struct MachineSchedPolicy;
+struct MCReadAdvanceEntry;
+struct MCWriteLatencyEntry;
+struct MCWriteProcResEntry;
+class RegisterBankInfo;
+class SDep;
+class SelectionDAGTargetInfo;
+struct SubtargetFeatureKV;
+struct SubtargetInfoKV;
+class SUnit;
+class TargetFrameLowering;
+class TargetInstrInfo;
+class TargetLowering;
+class TargetRegisterClass;
+class TargetRegisterInfo;
+class TargetSchedModel;
+class Triple;
+
+//===----------------------------------------------------------------------===//
+///
+/// TargetSubtargetInfo - Generic base class for all target subtargets.  All
+/// Target-specific options that control code generation and printing should
+/// be exposed through a TargetSubtargetInfo-derived class.
+///
+class TargetSubtargetInfo : public MCSubtargetInfo {
+protected: // Can only create subclasses...
+  TargetSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS,
+                      ArrayRef<SubtargetFeatureKV> PF,
+                      ArrayRef<SubtargetFeatureKV> PD,
+                      const SubtargetInfoKV *ProcSched,
+                      const MCWriteProcResEntry *WPR,
+                      const MCWriteLatencyEntry *WL,
+                      const MCReadAdvanceEntry *RA, const InstrStage *IS,
+                      const unsigned *OC, const unsigned *FP);
+
+public:
+  // AntiDepBreakMode - Type of anti-dependence breaking that should
+  // be performed before post-RA scheduling.
+  using AntiDepBreakMode = enum { ANTIDEP_NONE, ANTIDEP_CRITICAL, ANTIDEP_ALL };
+  using RegClassVector = SmallVectorImpl<const TargetRegisterClass *>;
+
+  TargetSubtargetInfo() = delete;
+  TargetSubtargetInfo(const TargetSubtargetInfo &) = delete;
+  TargetSubtargetInfo &operator=(const TargetSubtargetInfo &) = delete;
+  ~TargetSubtargetInfo() override;
+
+  virtual bool isXRaySupported() const { return false; }
+
+  // Interfaces to the major aspects of target machine information:
+  //
+  // -- Instruction opcode and operand information
+  // -- Pipelines and scheduling information
+  // -- Stack frame information
+  // -- Selection DAG lowering information
+  // -- Call lowering information
+  //
+  // N.B. These objects may change during compilation. It's not safe to cache
+  // them between functions.
+  virtual const TargetInstrInfo *getInstrInfo() const { return nullptr; }
+  virtual const TargetFrameLowering *getFrameLowering() const {
+    return nullptr;
+  }
+  virtual const TargetLowering *getTargetLowering() const { return nullptr; }
+  virtual const SelectionDAGTargetInfo *getSelectionDAGInfo() const {
+    return nullptr;
+  }
+  virtual const CallLowering *getCallLowering() const { return nullptr; }
+
+  // FIXME: This lets targets specialize the selector by subtarget (which lets
+  // us do things like a dedicated avx512 selector).  However, we might want
+  // to also specialize selectors by MachineFunction, which would let us be
+  // aware of optsize/optnone and such.
+  virtual const InstructionSelector *getInstructionSelector() const {
+    return nullptr;
+  }
+
+  virtual unsigned getHwMode() const { return 0; }
+
+  /// Target can subclass this hook to select a different DAG scheduler.
+  virtual RegisterScheduler::FunctionPassCtor
+      getDAGScheduler(CodeGenOpt::Level) const {
+    return nullptr;
+  }
+
+  virtual const LegalizerInfo *getLegalizerInfo() const { return nullptr; }
+
+  /// getRegisterInfo - If register information is available, return it.  If
+  /// not, return null.
+  virtual const TargetRegisterInfo *getRegisterInfo() const { return nullptr; }
+
+  /// If the information for the register banks is available, return it.
+  /// Otherwise return nullptr.
+  virtual const RegisterBankInfo *getRegBankInfo() const { return nullptr; }
+
+  /// getInstrItineraryData - Returns instruction itinerary data for the target
+  /// or specific subtarget.
+  virtual const InstrItineraryData *getInstrItineraryData() const {
+    return nullptr;
+  }
+
+  /// Resolve a SchedClass at runtime, where SchedClass identifies an
+  /// MCSchedClassDesc with the isVariant property. This may return the ID of
+  /// another variant SchedClass, but repeated invocation must quickly terminate
+  /// in a nonvariant SchedClass.
+  virtual unsigned resolveSchedClass(unsigned SchedClass,
+                                     const MachineInstr *MI,
+                                     const TargetSchedModel *SchedModel) const {
+    return 0;
+  }
+
+  /// \brief True if the subtarget should run MachineScheduler after aggressive
+  /// coalescing.
+  ///
+  /// This currently replaces the SelectionDAG scheduler with the "source" order
+  /// scheduler (though see below for an option to turn this off and use the
+  /// TargetLowering preference). It does not yet disable the postRA scheduler.
+  virtual bool enableMachineScheduler() const;
+
+  /// \brief Support printing of [latency:throughput] comment in output .S file.
+  virtual bool supportPrintSchedInfo() const { return false; }
+
+  /// \brief True if the machine scheduler should disable the TLI preference
+  /// for preRA scheduling with the source level scheduler.
+  virtual bool enableMachineSchedDefaultSched() const { return true; }
+
+  /// \brief True if the subtarget should enable joining global copies.
+  ///
+  /// By default this is enabled if the machine scheduler is enabled, but
+  /// can be overridden.
+  virtual bool enableJoinGlobalCopies() const;
+
+  /// True if the subtarget should run a scheduler after register allocation.
+  ///
+  /// By default this queries the PostRAScheduling bit in the scheduling model
+  /// which is the preferred way to influence this.
+  virtual bool enablePostRAScheduler() const;
+
+  /// \brief True if the subtarget should run the atomic expansion pass.
+  virtual bool enableAtomicExpand() const;
+
+  /// \brief Override generic scheduling policy within a region.
+  ///
+  /// This is a convenient way for targets that don't provide any custom
+  /// scheduling heuristics (no custom MachineSchedStrategy) to make
+  /// changes to the generic scheduling policy.
+  virtual void overrideSchedPolicy(MachineSchedPolicy &Policy,
+                                   unsigned NumRegionInstrs) const {}
+
+  // \brief Perform target specific adjustments to the latency of a schedule
+  // dependency.
+  virtual void adjustSchedDependency(SUnit *def, SUnit *use, SDep &dep) const {}
+
+  // For use with PostRAScheduling: get the anti-dependence breaking that should
+  // be performed before post-RA scheduling.
+  virtual AntiDepBreakMode getAntiDepBreakMode() const { return ANTIDEP_NONE; }
+
+  // For use with PostRAScheduling: in CriticalPathRCs, return any register
+  // classes that should only be considered for anti-dependence breaking if they
+  // are on the critical path.
+  virtual void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const {
+    return CriticalPathRCs.clear();
+  }
+
+  // \brief Provide an ordered list of schedule DAG mutations for the post-RA
+  // scheduler.
+  virtual void getPostRAMutations(
+      std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
+  }
+
+  // \brief Provide an ordered list of schedule DAG mutations for the machine
+  // pipeliner.
+  virtual void getSMSMutations(
+      std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
+  }
+
+  // For use with PostRAScheduling: get the minimum optimization level needed
+  // to enable post-RA scheduling.
+  virtual CodeGenOpt::Level getOptLevelToEnablePostRAScheduler() const {
+    return CodeGenOpt::Default;
+  }
+
+  /// \brief True if the subtarget should run the local reassignment
+  /// heuristic of the register allocator.
+  /// This heuristic may be compile time intensive, \p OptLevel provides
+  /// a finer grain to tune the register allocator.
+  virtual bool enableRALocalReassignment(CodeGenOpt::Level OptLevel) const;
+
+  /// \brief True if the subtarget should consider the cost of local intervals
+  /// created by a split candidate when choosing the best split candidate. This
+  /// heuristic may be compile time intensive.
+  virtual bool enableAdvancedRASplitCost() const;
+
+  /// \brief Enable use of alias analysis during code generation (during MI
+  /// scheduling, DAGCombine, etc.).
+  virtual bool useAA() const;
+
+  /// \brief Enable the use of the early if conversion pass.
+  virtual bool enableEarlyIfConversion() const { return false; }
+
+  /// \brief Return PBQPConstraint(s) for the target.
+  ///
+  /// Override to provide custom PBQP constraints.
+  virtual std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const {
+    return nullptr;
+  }
+
+  /// Enable tracking of subregister liveness in register allocator.
+  /// Please use MachineRegisterInfo::subRegLivenessEnabled() instead where
+  /// possible.
+  virtual bool enableSubRegLiveness() const { return false; }
+
+  /// Returns string representation of scheduler comment
+  std::string getSchedInfoStr(const MachineInstr &MI) const override;
+  std::string getSchedInfoStr(MCInst const &MCI) const override;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_TARGETSUBTARGETINFO_H
diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td
index b1e62daa5aaeb..73c7fb4ce4b37 100644
--- a/include/llvm/CodeGen/ValueTypes.td
+++ b/include/llvm/CodeGen/ValueTypes.td
@@ -40,110 +40,111 @@ def v8i1   : ValueType<8 ,  17>;   //   8 x i1 vector value
 def v16i1  : ValueType<16,  18>;   //  16 x i1 vector value
 def v32i1  : ValueType<32 , 19>;   //  32 x i1 vector value
 def v64i1  : ValueType<64 , 20>;   //  64 x i1 vector value
-def v512i1 : ValueType<512, 21>;   // 512 x i1 vector value
-def v1024i1: ValueType<1024,22>;   //1024 x i1 vector value
-
-def v1i8   : ValueType<8,  23>;   //  1 x i8  vector value
-def v2i8   : ValueType<16 , 24>;   //  2 x i8  vector value
-def v4i8   : ValueType<32 , 25>;   //  4 x i8  vector value
-def v8i8   : ValueType<64 , 26>;   //  8 x i8  vector value
-def v16i8  : ValueType<128, 27>;   // 16 x i8  vector value
-def v32i8  : ValueType<256, 28>;   // 32 x i8  vector value
-def v64i8  : ValueType<512, 29>;   // 64 x i8  vector value
-def v128i8 : ValueType<1024,30>;   //128 x i8  vector value
-def v256i8 : ValueType<2048,31>;   //256 x i8  vector value
-
-def v1i16  : ValueType<16 , 32>;   //  1 x i16 vector value
-def v2i16  : ValueType<32 , 33>;   //  2 x i16 vector value
-def v4i16  : ValueType<64 , 34>;   //  4 x i16 vector value
-def v8i16  : ValueType<128, 35>;   //  8 x i16 vector value
-def v16i16 : ValueType<256, 36>;   // 16 x i16 vector value
-def v32i16 : ValueType<512, 37>;   // 32 x i16 vector value
-def v64i16 : ValueType<1024,38>;   // 64 x i16 vector value
-def v128i16: ValueType<2048,39>;   //128 x i16 vector value
-
-def v1i32  : ValueType<32 , 40>;   //  1 x i32 vector value
-def v2i32  : ValueType<64 , 41>;   //  2 x i32 vector value
-def v4i32  : ValueType<128, 42>;   //  4 x i32 vector value
-def v8i32  : ValueType<256, 43>;   //  8 x i32 vector value
-def v16i32 : ValueType<512, 44>;   // 16 x i32 vector value
-def v32i32 : ValueType<1024,45>;   // 32 x i32 vector value
-def v64i32 : ValueType<2048,46>;   // 32 x i32 vector value
-
-def v1i64  : ValueType<64 , 47>;   //  1 x i64 vector value
-def v2i64  : ValueType<128, 48>;   //  2 x i64 vector value
-def v4i64  : ValueType<256, 49>;   //  4 x i64 vector value
-def v8i64  : ValueType<512, 50>;   //  8 x i64 vector value
-def v16i64 : ValueType<1024,51>;   // 16 x i64 vector value
-def v32i64 : ValueType<2048,52>;   // 32 x i64 vector value
-
-def v1i128 : ValueType<128, 53>;   //  1 x i128 vector value
-
-def nxv1i1  : ValueType<1,   54>;  // n x  1 x i1  vector value
-def nxv2i1  : ValueType<2,   55>;  // n x  2 x i1  vector value
-def nxv4i1  : ValueType<4,   56>;  // n x  4 x i1  vector value
-def nxv8i1  : ValueType<8,   57>;  // n x  8 x i1  vector value
-def nxv16i1 : ValueType<16,  58>;  // n x 16 x i1  vector value
-def nxv32i1 : ValueType<32,  59>;  // n x 32 x i1  vector value
-
-def nxv1i8  : ValueType<8,   60>;  // n x  1 x i8  vector value
-def nxv2i8  : ValueType<16,  61>;  // n x  2 x i8  vector value
-def nxv4i8  : ValueType<32,  62>;  // n x  4 x i8  vector value
-def nxv8i8  : ValueType<64,  63>;  // n x  8 x i8  vector value
-def nxv16i8 : ValueType<128, 64>;  // n x 16 x i8  vector value
-def nxv32i8 : ValueType<256, 65>;  // n x 32 x i8  vector value
-
-def nxv1i16 : ValueType<16,  66>;  // n x  1 x i16 vector value
-def nxv2i16 : ValueType<32,  67>;  // n x  2 x i16 vector value
-def nxv4i16 : ValueType<64,  68>;  // n x  4 x i16 vector value
-def nxv8i16 : ValueType<128, 69>;  // n x  8 x i16 vector value
-def nxv16i16: ValueType<256, 70>;  // n x 16 x i16 vector value
-def nxv32i16: ValueType<512, 71>;  // n x 32 x i16 vector value
-
-def nxv1i32 : ValueType<32,  72>;  // n x  1 x i32 vector value
-def nxv2i32 : ValueType<64,  73>;  // n x  2 x i32 vector value
-def nxv4i32 : ValueType<128, 74>;  // n x  4 x i32 vector value
-def nxv8i32 : ValueType<256, 75>;  // n x  8 x i32 vector value
-def nxv16i32: ValueType<512, 76>;  // n x 16 x i32 vector value
-def nxv32i32: ValueType<1024,77>;  // n x 32 x i32 vector value
-
-def nxv1i64 : ValueType<64,  78>;  // n x  1 x i64 vector value
-def nxv2i64 : ValueType<128, 79>;  // n x  2 x i64 vector value
-def nxv4i64 : ValueType<256, 80>;  // n x  4 x i64 vector value
-def nxv8i64 : ValueType<512, 81>;  // n x  8 x i64 vector value
-def nxv16i64: ValueType<1024,82>;  // n x 16 x i64 vector value
-def nxv32i64: ValueType<2048,83>;  // n x 32 x i64 vector value
-
-def v2f16  : ValueType<32 , 84>;   //  2 x f16 vector value
-def v4f16  : ValueType<64 , 85>;   //  4 x f16 vector value
-def v8f16  : ValueType<128, 86>;   //  8 x f16 vector value
-def v1f32  : ValueType<32 , 87>;   //  1 x f32 vector value
-def v2f32  : ValueType<64 , 88>;   //  2 x f32 vector value
-def v4f32  : ValueType<128, 89>;   //  4 x f32 vector value
-def v8f32  : ValueType<256, 90>;   //  8 x f32 vector value
-def v16f32 : ValueType<512, 91>;   // 16 x f32 vector value
-def v1f64  : ValueType<64,  92>;   //  1 x f64 vector value
-def v2f64  : ValueType<128, 93>;   //  2 x f64 vector value
-def v4f64  : ValueType<256, 94>;   //  4 x f64 vector value
-def v8f64  : ValueType<512, 95>;   //  8 x f64 vector value
-
-def nxv2f16  : ValueType<32 ,  96>; // n x  2 x f16 vector value
-def nxv4f16  : ValueType<64 ,  97>; // n x  4 x f16 vector value
-def nxv8f16  : ValueType<128,  98>; // n x  8 x f16 vector value
-def nxv1f32  : ValueType<32 ,  99>; // n x  1 x f32 vector value
-def nxv2f32  : ValueType<64 , 100>; // n x  2 x f32 vector value
-def nxv4f32  : ValueType<128, 101>; // n x  4 x f32 vector value
-def nxv8f32  : ValueType<256, 102>; // n x  8 x f32 vector value
-def nxv16f32 : ValueType<512, 103>; // n x 16 x f32 vector value
-def nxv1f64  : ValueType<64,  104>; // n x  1 x f64 vector value
-def nxv2f64  : ValueType<128, 105>; // n x  2 x f64 vector value
-def nxv4f64  : ValueType<256, 106>; // n x  4 x f64 vector value
-def nxv8f64  : ValueType<512, 107>; // n x  8 x f64 vector value
-
-def x86mmx : ValueType<64 , 108>;   // X86 MMX value
-def FlagVT : ValueType<0  , 109>;   // Pre-RA sched glue
-def isVoid : ValueType<0  , 110>;   // Produces no value
-def untyped: ValueType<8  , 111>;   // Produces an untyped value
+def v128i1 : ValueType<128, 21>;   // 128 x i1 vector value
+def v512i1 : ValueType<512, 22>;   // 512 x i1 vector value
+def v1024i1: ValueType<1024,23>;   //1024 x i1 vector value
+
+def v1i8   : ValueType<8,   24>;   //  1 x i8  vector value
+def v2i8   : ValueType<16 , 25>;   //  2 x i8  vector value
+def v4i8   : ValueType<32 , 26>;   //  4 x i8  vector value
+def v8i8   : ValueType<64 , 27>;   //  8 x i8  vector value
+def v16i8  : ValueType<128, 28>;   // 16 x i8  vector value
+def v32i8  : ValueType<256, 29>;   // 32 x i8  vector value
+def v64i8  : ValueType<512, 30>;   // 64 x i8  vector value
+def v128i8 : ValueType<1024,31>;   //128 x i8  vector value
+def v256i8 : ValueType<2048,32>;   //256 x i8  vector value
+
+def v1i16  : ValueType<16 , 33>;   //  1 x i16 vector value
+def v2i16  : ValueType<32 , 34>;   //  2 x i16 vector value
+def v4i16  : ValueType<64 , 35>;   //  4 x i16 vector value
+def v8i16  : ValueType<128, 36>;   //  8 x i16 vector value
+def v16i16 : ValueType<256, 37>;   // 16 x i16 vector value
+def v32i16 : ValueType<512, 38>;   // 32 x i16 vector value
+def v64i16 : ValueType<1024,39>;   // 64 x i16 vector value
+def v128i16: ValueType<2048,40>;   //128 x i16 vector value
+
+def v1i32  : ValueType<32 , 41>;   //  1 x i32 vector value
+def v2i32  : ValueType<64 , 42>;   //  2 x i32 vector value
+def v4i32  : ValueType<128, 43>;   //  4 x i32 vector value
+def v8i32  : ValueType<256, 44>;   //  8 x i32 vector value
+def v16i32 : ValueType<512, 45>;   // 16 x i32 vector value
+def v32i32 : ValueType<1024,46>;   // 32 x i32 vector value
+def v64i32 : ValueType<2048,47>;   // 32 x i32 vector value
+
+def v1i64  : ValueType<64 , 48>;   //  1 x i64 vector value
+def v2i64  : ValueType<128, 49>;   //  2 x i64 vector value
+def v4i64  : ValueType<256, 50>;   //  4 x i64 vector value
+def v8i64  : ValueType<512, 51>;   //  8 x i64 vector value
+def v16i64 : ValueType<1024,52>;   // 16 x i64 vector value
+def v32i64 : ValueType<2048,53>;   // 32 x i64 vector value
+
+def v1i128 : ValueType<128, 54>;   //  1 x i128 vector value
+
+def nxv1i1  : ValueType<1,   55>;  // n x  1 x i1  vector value
+def nxv2i1  : ValueType<2,   56>;  // n x  2 x i1  vector value
+def nxv4i1  : ValueType<4,   57>;  // n x  4 x i1  vector value
+def nxv8i1  : ValueType<8,   58>;  // n x  8 x i1  vector value
+def nxv16i1 : ValueType<16,  59>;  // n x 16 x i1  vector value
+def nxv32i1 : ValueType<32,  60>;  // n x 32 x i1  vector value
+
+def nxv1i8  : ValueType<8,   61>;  // n x  1 x i8  vector value
+def nxv2i8  : ValueType<16,  62>;  // n x  2 x i8  vector value
+def nxv4i8  : ValueType<32,  63>;  // n x  4 x i8  vector value
+def nxv8i8  : ValueType<64,  64>;  // n x  8 x i8  vector value
+def nxv16i8 : ValueType<128, 65>;  // n x 16 x i8  vector value
+def nxv32i8 : ValueType<256, 66>;  // n x 32 x i8  vector value
+
+def nxv1i16 : ValueType<16,  67>;  // n x  1 x i16 vector value
+def nxv2i16 : ValueType<32,  68>;  // n x  2 x i16 vector value
+def nxv4i16 : ValueType<64,  69>;  // n x  4 x i16 vector value
+def nxv8i16 : ValueType<128, 70>;  // n x  8 x i16 vector value
+def nxv16i16: ValueType<256, 71>;  // n x 16 x i16 vector value
+def nxv32i16: ValueType<512, 72>;  // n x 32 x i16 vector value
+
+def nxv1i32 : ValueType<32,  73>;  // n x  1 x i32 vector value
+def nxv2i32 : ValueType<64,  74>;  // n x  2 x i32 vector value
+def nxv4i32 : ValueType<128, 75>;  // n x  4 x i32 vector value
+def nxv8i32 : ValueType<256, 76>;  // n x  8 x i32 vector value
+def nxv16i32: ValueType<512, 77>;  // n x 16 x i32 vector value
+def nxv32i32: ValueType<1024,78>;  // n x 32 x i32 vector value
+
+def nxv1i64 : ValueType<64,  79>;  // n x  1 x i64 vector value
+def nxv2i64 : ValueType<128, 80>;  // n x  2 x i64 vector value
+def nxv4i64 : ValueType<256, 81>;  // n x  4 x i64 vector value
+def nxv8i64 : ValueType<512, 82>;  // n x  8 x i64 vector value
+def nxv16i64: ValueType<1024,83>;  // n x 16 x i64 vector value
+def nxv32i64: ValueType<2048,84>;  // n x 32 x i64 vector value
+
+def v2f16  : ValueType<32 , 85>;   //  2 x f16 vector value
+def v4f16  : ValueType<64 , 86>;   //  4 x f16 vector value
+def v8f16  : ValueType<128, 87>;   //  8 x f16 vector value
+def v1f32  : ValueType<32 , 88>;   //  1 x f32 vector value
+def v2f32  : ValueType<64 , 89>;   //  2 x f32 vector value
+def v4f32  : ValueType<128, 90>;   //  4 x f32 vector value
+def v8f32  : ValueType<256, 91>;   //  8 x f32 vector value
+def v16f32 : ValueType<512, 92>;   // 16 x f32 vector value
+def v1f64  : ValueType<64,  93>;   //  1 x f64 vector value
+def v2f64  : ValueType<128, 94>;   //  2 x f64 vector value
+def v4f64  : ValueType<256, 95>;   //  4 x f64 vector value
+def v8f64  : ValueType<512, 96>;   //  8 x f64 vector value
+
+def nxv2f16  : ValueType<32 ,  97>; // n x  2 x f16 vector value
+def nxv4f16  : ValueType<64 ,  98>; // n x  4 x f16 vector value
+def nxv8f16  : ValueType<128,  99>; // n x  8 x f16 vector value
+def nxv1f32  : ValueType<32 , 100>; // n x  1 x f32 vector value
+def nxv2f32  : ValueType<64 , 101>; // n x  2 x f32 vector value
+def nxv4f32  : ValueType<128, 102>; // n x  4 x f32 vector value
+def nxv8f32  : ValueType<256, 103>; // n x  8 x f32 vector value
+def nxv16f32 : ValueType<512, 104>; // n x 16 x f32 vector value
+def nxv1f64  : ValueType<64,  105>; // n x  1 x f64 vector value
+def nxv2f64  : ValueType<128, 106>; // n x  2 x f64 vector value
+def nxv4f64  : ValueType<256, 107>; // n x  4 x f64 vector value
+def nxv8f64  : ValueType<512, 108>; // n x  8 x f64 vector value
+
+def x86mmx : ValueType<64 , 109>;   // X86 MMX value
+def FlagVT : ValueType<0  , 110>;   // Pre-RA sched glue
+def isVoid : ValueType<0  , 111>;   // Produces no value
+def untyped: ValueType<8  , 112>;   // Produces an untyped value
 def token  : ValueType<0  , 248>;   // TokenTy
 def MetadataVT: ValueType<0, 249>;  // Metadata
 
diff --git a/include/llvm/CodeGen/VirtRegMap.h b/include/llvm/CodeGen/VirtRegMap.h
index b9076353fd07d..3b06f03931147 100644
--- a/include/llvm/CodeGen/VirtRegMap.h
+++ b/include/llvm/CodeGen/VirtRegMap.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/VirtRegMap.h - Virtual Register Map -*- C++ -*--------===//
+//===- llvm/CodeGen/VirtRegMap.h - Virtual Register Map ---------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -19,15 +19,17 @@
 
 #include "llvm/ADT/IndexedMap.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include <cassert>
 
 namespace llvm {
-  class MachineInstr;
-  class MachineFunction;
-  class MachineRegisterInfo;
-  class TargetInstrInfo;
-  class raw_ostream;
-  class SlotIndexes;
+
+class MachineFunction;
+class MachineRegisterInfo;
+class raw_ostream;
+class TargetInstrInfo;
 
   class VirtRegMap : public MachineFunctionPass {
   public:
@@ -63,13 +65,14 @@ namespace llvm {
     /// createSpillSlot - Allocate a spill slot for RC from MFI.
     unsigned createSpillSlot(const TargetRegisterClass *RC);
 
-    VirtRegMap(const VirtRegMap&) = delete;
-    void operator=(const VirtRegMap&) = delete;
-
   public:
     static char ID;
+
     VirtRegMap() : MachineFunctionPass(ID), Virt2PhysMap(NO_PHYS_REG),
-                   Virt2StackSlotMap(NO_STACK_SLOT), Virt2SplitMap(0) { }
+                   Virt2StackSlotMap(NO_STACK_SLOT), Virt2SplitMap(0) {}
+    VirtRegMap(const VirtRegMap &) = delete;
+    VirtRegMap &operator=(const VirtRegMap &) = delete;
+
     bool runOnMachineFunction(MachineFunction &MF) override;
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -166,6 +169,7 @@ namespace llvm {
     /// @brief create a mapping for the specifed virtual register to
     /// the next available stack slot
     int assignVirt2StackSlot(unsigned virtReg);
+
     /// @brief create a mapping for the specified virtual register to
     /// the specified stack slot
     void assignVirt2StackSlot(unsigned virtReg, int frameIndex);
@@ -178,6 +182,7 @@ namespace llvm {
     VRM.print(OS);
     return OS;
   }
-} // End llvm namespace
 
-#endif
+} // end llvm namespace
+
+#endif // LLVM_CODEGEN_VIRTREGMAP_H
author	Dimitry Andric <dim@FreeBSD.org>	2017-12-18 20:10:56 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2017-12-18 20:10:56 +0000
commit	044eb2f6afba375a914ac9d8024f8f5142bb912e (patch)
tree	1475247dc9f9fe5be155ebd4c9069c75aadf8c20 /include/llvm/CodeGen
parent	eb70dddbd77e120e5d490bd8fbe7ff3f8fa81c6b (diff)