108 files changed, 5185 insertions, 3539 deletions
diff --git a/include/llvm/CodeGen/AccelTable.h b/include/llvm/CodeGen/AccelTable.h
new file mode 100644
index 000000000000..13928582f2dd
--- /dev/null
+++ b/include/llvm/CodeGen/AccelTable.h
@@ -0,0 +1,434 @@
+//==- include/llvm/CodeGen/AccelTable.h - Accelerator Tables -----*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing accelerator tables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_DWARFACCELTABLE_H
+#define LLVM_CODEGEN_DWARFACCELTABLE_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/CodeGen/DwarfStringPoolEntry.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/DJB.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstddef>
+#include <cstdint>
+#include <vector>
+
+/// The DWARF and Apple accelerator tables are an indirect hash table optimized
+/// for null lookup rather than access to known data. The Apple accelerator
+/// tables are a precursor of the newer DWARF v5 accelerator tables. Both
+/// formats share common design ideas.
+///
+/// The Apple accelerator table are output into an on-disk format that looks
+/// like this:
+///
+/// .------------------.
+/// |  HEADER          |
+/// |------------------|
+/// |  BUCKETS         |
+/// |------------------|
+/// |  HASHES          |
+/// |------------------|
+/// |  OFFSETS         |
+/// |------------------|
+/// |  DATA            |
+/// `------------------'
+///
+/// The header contains a magic number, version, type of hash function,
+/// the number of buckets, total number of hashes, and room for a special struct
+/// of data and the length of that struct.
+///
+/// The buckets contain an index (e.g. 6) into the hashes array. The hashes
+/// section contains all of the 32-bit hash values in contiguous memory, and the
+/// offsets contain the offset into the data area for the particular hash.
+///
+/// For a lookup example, we could hash a function name and take it modulo the
+/// number of buckets giving us our bucket. From there we take the bucket value
+/// as an index into the hashes table and look at each successive hash as long
+/// as the hash value is still the same modulo result (bucket value) as earlier.
+/// If we have a match we look at that same entry in the offsets table and grab
+/// the offset in the data for our final match.
+///
+/// The DWARF v5 accelerator table consists of zero or more name indices that
+/// are output into an on-disk format that looks like this:
+///
+/// .------------------.
+/// |  HEADER          |
+/// |------------------|
+/// |  CU LIST         |
+/// |------------------|
+/// |  LOCAL TU LIST   |
+/// |------------------|
+/// |  FOREIGN TU LIST |
+/// |------------------|
+/// |  HASH TABLE      |
+/// |------------------|
+/// |  NAME TABLE      |
+/// |------------------|
+/// |  ABBREV TABLE    |
+/// |------------------|
+/// |  ENTRY POOL      |
+/// `------------------'
+///
+/// For the full documentation please refer to the DWARF 5 standard.
+///
+///
+/// This file defines the class template AccelTable, which is represents an
+/// abstract view of an Accelerator table, without any notion of an on-disk
+/// layout. This class is parameterized by an entry type, which should derive
+/// from AccelTableData. This is the type of individual entries in the table,
+/// and it should store the data necessary to emit them. AppleAccelTableData is
+/// the base class for Apple Accelerator Table entries, which have a uniform
+/// structure based on a sequence of Atoms. There are different sub-classes
+/// derived from AppleAccelTable, which differ in the set of Atoms and how they
+/// obtain their values.
+///
+/// An Apple Accelerator Table can be serialized by calling emitAppleAccelTable
+/// function.
+///
+/// TODO: Add DWARF v5 emission code.
+
+namespace llvm {
+
+class AsmPrinter;
+class DwarfCompileUnit;
+class DwarfDebug;
+
+/// Interface which the different types of accelerator table data have to
+/// conform. It serves as a base class for different values of the template
+/// argument of the AccelTable class template.
+class AccelTableData {
+public:
+  virtual ~AccelTableData() = default;
+
+  bool operator<(const AccelTableData &Other) const {
+    return order() < Other.order();
+  }
+
+    // Subclasses should implement:
+    // static uint32_t hash(StringRef Name);
+
+#ifndef NDEBUG
+  virtual void print(raw_ostream &OS) const = 0;
+#endif
+protected:
+  virtual uint64_t order() const = 0;
+};
+
+/// A base class holding non-template-dependant functionality of the AccelTable
+/// class. Clients should not use this class directly but rather instantiate
+/// AccelTable with a type derived from AccelTableData.
+class AccelTableBase {
+public:
+  using HashFn = uint32_t(StringRef);
+
+  /// Represents a group of entries with identical name (and hence, hash value).
+  struct HashData {
+    DwarfStringPoolEntryRef Name;
+    uint32_t HashValue;
+    std::vector<AccelTableData *> Values;
+    MCSymbol *Sym;
+
+    HashData(DwarfStringPoolEntryRef Name, HashFn *Hash)
+        : Name(Name), HashValue(Hash(Name.getString())) {}
+
+#ifndef NDEBUG
+    void print(raw_ostream &OS) const;
+    void dump() const { print(dbgs()); }
+#endif
+  };
+  using HashList = std::vector<HashData *>;
+  using BucketList = std::vector<HashList>;
+
+protected:
+  /// Allocator for HashData and Values.
+  BumpPtrAllocator Allocator;
+
+  using StringEntries = StringMap<HashData, BumpPtrAllocator &>;
+  StringEntries Entries;
+
+  HashFn *Hash;
+  uint32_t BucketCount;
+  uint32_t UniqueHashCount;
+
+  HashList Hashes;
+  BucketList Buckets;
+
+  void computeBucketCount();
+
+  AccelTableBase(HashFn *Hash) : Entries(Allocator), Hash(Hash) {}
+
+public:
+  void finalize(AsmPrinter *Asm, StringRef Prefix);
+  ArrayRef<HashList> getBuckets() const { return Buckets; }
+  uint32_t getBucketCount() const { return BucketCount; }
+  uint32_t getUniqueHashCount() const { return UniqueHashCount; }
+  uint32_t getUniqueNameCount() const { return Entries.size(); }
+
+#ifndef NDEBUG
+  void print(raw_ostream &OS) const;
+  void dump() const { print(dbgs()); }
+#endif
+
+  AccelTableBase(const AccelTableBase &) = delete;
+  void operator=(const AccelTableBase &) = delete;
+};
+
+/// This class holds an abstract representation of an Accelerator Table,
+/// consisting of a sequence of buckets, each bucket containint a sequence of
+/// HashData entries. The class is parameterized by the type of entries it
+/// holds. The type template parameter also defines the hash function to use for
+/// hashing names.
+template <typename DataT> class AccelTable : public AccelTableBase {
+public:
+  AccelTable() : AccelTableBase(DataT::hash) {}
+
+  template <typename... Types>
+  void addName(DwarfStringPoolEntryRef Name, Types &&... Args);
+};
+
+template <typename AccelTableDataT>
+template <typename... Types>
+void AccelTable<AccelTableDataT>::addName(DwarfStringPoolEntryRef Name,
+                                          Types &&... Args) {
+  assert(Buckets.empty() && "Already finalized!");
+  // If the string is in the list already then add this die to the list
+  // otherwise add a new one.
+  auto Iter = Entries.try_emplace(Name.getString(), Name, Hash).first;
+  assert(Iter->second.Name == Name);
+  Iter->second.Values.push_back(
+      new (Allocator) AccelTableDataT(std::forward<Types>(Args)...));
+}
+
+/// A base class for different implementations of Data classes for Apple
+/// Accelerator Tables. The columns in the table are defined by the static Atoms
+/// variable defined on the subclasses.
+class AppleAccelTableData : public AccelTableData {
+public:
+  /// An Atom defines the form of the data in an Apple accelerator table.
+  /// Conceptually it is a column in the accelerator consisting of a type and a
+  /// specification of the form of its data.
+  struct Atom {
+    /// Atom Type.
+    const uint16_t Type;
+    /// DWARF Form.
+    const uint16_t Form;
+
+    constexpr Atom(uint16_t Type, uint16_t Form) : Type(Type), Form(Form) {}
+
+#ifndef NDEBUG
+    void print(raw_ostream &OS) const;
+    void dump() const { print(dbgs()); }
+#endif
+  };
+  // Subclasses should define:
+  // static constexpr Atom Atoms[];
+
+  virtual void emit(AsmPrinter *Asm) const = 0;
+
+  static uint32_t hash(StringRef Buffer) { return djbHash(Buffer); }
+};
+
+/// The Data class implementation for DWARF v5 accelerator table. Unlike the
+/// Apple Data classes, this class is just a DIE wrapper, and does not know to
+/// serialize itself. The complete serialization logic is in the
+/// emitDWARF5AccelTable function.
+class DWARF5AccelTableData : public AccelTableData {
+public:
+  static uint32_t hash(StringRef Name) { return caseFoldingDjbHash(Name); }
+
+  DWARF5AccelTableData(const DIE &Die) : Die(Die) {}
+
+#ifndef NDEBUG
+  void print(raw_ostream &OS) const override;
+#endif
+
+  const DIE &getDie() const { return Die; }
+  uint64_t getDieOffset() const { return Die.getOffset(); }
+  unsigned getDieTag() const { return Die.getTag(); }
+
+protected:
+  const DIE &Die;
+
+  uint64_t order() const override { return Die.getOffset(); }
+};
+
+class DWARF5AccelTableStaticData : public AccelTableData {
+public:
+  static uint32_t hash(StringRef Name) { return caseFoldingDjbHash(Name); }
+
+  DWARF5AccelTableStaticData(uint64_t DieOffset, unsigned DieTag,
+                             unsigned CUIndex)
+      : DieOffset(DieOffset), DieTag(DieTag), CUIndex(CUIndex) {}
+
+#ifndef NDEBUG
+  void print(raw_ostream &OS) const override;
+#endif
+
+  uint64_t getDieOffset() const { return DieOffset; }
+  unsigned getDieTag() const { return DieTag; }
+  unsigned getCUIndex() const { return CUIndex; }
+
+protected:
+  uint64_t DieOffset;
+  unsigned DieTag;
+  unsigned CUIndex;
+
+  uint64_t order() const override { return DieOffset; }
+};
+
+void emitAppleAccelTableImpl(AsmPrinter *Asm, AccelTableBase &Contents,
+                             StringRef Prefix, const MCSymbol *SecBegin,
+                             ArrayRef<AppleAccelTableData::Atom> Atoms);
+
+/// Emit an Apple Accelerator Table consisting of entries in the specified
+/// AccelTable. The DataT template parameter should be derived from
+/// AppleAccelTableData.
+template <typename DataT>
+void emitAppleAccelTable(AsmPrinter *Asm, AccelTable<DataT> &Contents,
+                         StringRef Prefix, const MCSymbol *SecBegin) {
+  static_assert(std::is_convertible<DataT *, AppleAccelTableData *>::value, "");
+  emitAppleAccelTableImpl(Asm, Contents, Prefix, SecBegin, DataT::Atoms);
+}
+
+void emitDWARF5AccelTable(AsmPrinter *Asm,
+                          AccelTable<DWARF5AccelTableData> &Contents,
+                          const DwarfDebug &DD,
+                          ArrayRef<std::unique_ptr<DwarfCompileUnit>> CUs);
+
+void emitDWARF5AccelTable(
+    AsmPrinter *Asm, AccelTable<DWARF5AccelTableStaticData> &Contents,
+    ArrayRef<MCSymbol *> CUs,
+    llvm::function_ref<unsigned(const DWARF5AccelTableStaticData &)>
+        getCUIndexForEntry);
+
+/// Accelerator table data implementation for simple Apple accelerator tables
+/// with just a DIE reference.
+class AppleAccelTableOffsetData : public AppleAccelTableData {
+public:
+  AppleAccelTableOffsetData(const DIE &D) : Die(D) {}
+
+  void emit(AsmPrinter *Asm) const override;
+
+#ifndef _MSC_VER
+  // The line below is rejected by older versions (TBD) of MSVC.
+  static constexpr Atom Atoms[] = {
+      Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)};
+#else
+  // FIXME: Erase this path once the minimum MSCV version has been bumped.
+  static const SmallVector<Atom, 4> Atoms;
+#endif
+
+#ifndef NDEBUG
+  void print(raw_ostream &OS) const override;
+#endif
+protected:
+  uint64_t order() const override { return Die.getOffset(); }
+
+  const DIE &Die;
+};
+
+/// Accelerator table data implementation for Apple type accelerator tables.
+class AppleAccelTableTypeData : public AppleAccelTableOffsetData {
+public:
+  AppleAccelTableTypeData(const DIE &D) : AppleAccelTableOffsetData(D) {}
+
+  void emit(AsmPrinter *Asm) const override;
+
+#ifndef _MSC_VER
+  // The line below is rejected by older versions (TBD) of MSVC.
+  static constexpr Atom Atoms[] = {
+      Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
+      Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
+      Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)};
+#else
+  // FIXME: Erase this path once the minimum MSCV version has been bumped.
+  static const SmallVector<Atom, 4> Atoms;
+#endif
+
+#ifndef NDEBUG
+  void print(raw_ostream &OS) const override;
+#endif
+};
+
+/// Accelerator table data implementation for simple Apple accelerator tables
+/// with a DIE offset but no actual DIE pointer.
+class AppleAccelTableStaticOffsetData : public AppleAccelTableData {
+public:
+  AppleAccelTableStaticOffsetData(uint32_t Offset) : Offset(Offset) {}
+
+  void emit(AsmPrinter *Asm) const override;
+
+#ifndef _MSC_VER
+  // The line below is rejected by older versions (TBD) of MSVC.
+  static constexpr Atom Atoms[] = {
+      Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)};
+#else
+  // FIXME: Erase this path once the minimum MSCV version has been bumped.
+  static const SmallVector<Atom, 4> Atoms;
+#endif
+
+#ifndef NDEBUG
+  void print(raw_ostream &OS) const override;
+#endif
+protected:
+  uint64_t order() const override { return Offset; }
+
+  uint32_t Offset;
+};
+
+/// Accelerator table data implementation for type accelerator tables with
+/// a DIE offset but no actual DIE pointer.
+class AppleAccelTableStaticTypeData : public AppleAccelTableStaticOffsetData {
+public:
+  AppleAccelTableStaticTypeData(uint32_t Offset, uint16_t Tag,
+                                bool ObjCClassIsImplementation,
+                                uint32_t QualifiedNameHash)
+      : AppleAccelTableStaticOffsetData(Offset),
+        QualifiedNameHash(QualifiedNameHash), Tag(Tag),
+        ObjCClassIsImplementation(ObjCClassIsImplementation) {}
+
+  void emit(AsmPrinter *Asm) const override;
+
+#ifndef _MSC_VER
+  // The line below is rejected by older versions (TBD) of MSVC.
+  static constexpr Atom Atoms[] = {
+      Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
+      Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
+      Atom(5, dwarf::DW_FORM_data1), Atom(6, dwarf::DW_FORM_data4)};
+#else
+  // FIXME: Erase this path once the minimum MSCV version has been bumped.
+  static const SmallVector<Atom, 4> Atoms;
+#endif
+
+#ifndef NDEBUG
+  void print(raw_ostream &OS) const override;
+#endif
+protected:
+  uint64_t order() const override { return Offset; }
+
+  uint32_t QualifiedNameHash;
+  uint16_t Tag;
+  bool ObjCClassIsImplementation;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_DWARFACCELTABLE_H
diff --git a/include/llvm/CodeGen/Analysis.h b/include/llvm/CodeGen/Analysis.h
index ba88f1f78fb8..d77aee66ed76 100644
--- a/include/llvm/CodeGen/Analysis.h
+++ b/include/llvm/CodeGen/Analysis.h
@@ -36,7 +36,7 @@ class SDValue;
 class SelectionDAG;
 struct EVT;
 
-/// \brief Compute the linearized index of a member in a nested
+/// Compute the linearized index of a member in a nested
 /// aggregate/struct/array.
 ///
 /// Given an LLVM IR aggregate type and a sequence of insertvalue or
@@ -124,7 +124,7 @@ bool returnTypeIsEligibleForTailCall(const Function *F, const Instruction *I,
                                      const TargetLoweringBase &TLI);
 
 DenseMap<const MachineBasicBlock *, int>
-getFuncletMembership(const MachineFunction &MF);
+getEHScopeMembership(const MachineFunction &MF);
 
 } // End llvm namespace
 
diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h
index b8944a668000..b6056380916c 100644
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@@ -50,6 +50,7 @@ class GlobalValue;
 class GlobalVariable;
 class MachineBasicBlock;
 class MachineConstantPoolValue;
+class MachineDominatorTree;
 class MachineFunction;
 class MachineInstr;
 class MachineJumpTableInfo;
@@ -92,11 +93,17 @@ public:
   std::unique_ptr<MCStreamer> OutStreamer;
 
   /// The current machine function.
-  const MachineFunction *MF = nullptr;
+  MachineFunction *MF = nullptr;
 
   /// This is a pointer to the current MachineModuleInfo.
   MachineModuleInfo *MMI = nullptr;
 
+  /// This is a pointer to the current MachineLoopInfo.
+  MachineDominatorTree *MDT = nullptr;
+
+  /// This is a pointer to the current MachineLoopInfo.
+  MachineLoopInfo *MLI = nullptr;
+
   /// Optimization remark emitter.
   MachineOptimizationRemarkEmitter *ORE;
 
@@ -130,9 +137,6 @@ private:
 
   static char ID;
 
-  /// If VerboseAsm is set, a pointer to the loop info for this function.
-  MachineLoopInfo *LI = nullptr;
-
   struct HandlerInfo {
     AsmPrinterHandler *Handler;
     const char *TimerName;
@@ -161,6 +165,12 @@ public:
   };
 
 private:
+  /// If generated on the fly this own the instance.
+  std::unique_ptr<MachineDominatorTree> OwnedMDT;
+
+  /// If generated on the fly this own the instance.
+  std::unique_ptr<MachineLoopInfo> OwnedMLI;
+
   /// Structure for generating diagnostics for inline assembly. Only initialised
   /// when necessary.
   mutable std::unique_ptr<SrcMgrDiagInfo> DiagInfo;
@@ -191,6 +201,10 @@ public:
   /// Return a unique ID for the current function.
   unsigned getFunctionNumber() const;
 
+  /// Return symbol for the function pseudo stack if the stack frame is not a
+  /// register based.
+  virtual const MCSymbol *getFunctionFrameSymbol() const { return nullptr; }
+
   MCSymbol *getFunctionBegin() const { return CurrentFnBegin; }
   MCSymbol *getFunctionEnd() const { return CurrentFnEnd; }
   MCSymbol *getCurExceptionSym();
@@ -228,6 +242,7 @@ public:
     TAIL_CALL = 2,
     LOG_ARGS_ENTER = 3,
     CUSTOM_EVENT = 4,
+    TYPED_EVENT = 5,
   };
 
   // The table will contain these structs that point to the sled, the function
@@ -327,15 +342,15 @@ public:
   /// global value is specified, and if that global has an explicit alignment
   /// requested, it will override the alignment request if required for
   /// correctness.
-  void EmitAlignment(unsigned NumBits, const GlobalObject *GO = nullptr) const;
+  void EmitAlignment(unsigned NumBits, const GlobalObject *GV = nullptr) const;
 
   /// Lower the specified LLVM Constant to an MCExpr.
   virtual const MCExpr *lowerConstant(const Constant *CV);
 
-  /// \brief Print a general LLVM constant to the .s file.
+  /// Print a general LLVM constant to the .s file.
   void EmitGlobalConstant(const DataLayout &DL, const Constant *CV);
 
-  /// \brief Unnamed constant global variables solely contaning a pointer to
+  /// Unnamed constant global variables solely contaning a pointer to
   /// another globals variable act like a global variable "proxy", or GOT
   /// equivalents, i.e., it's only used to hold the address of the latter. One
   /// optimization is to replace accesses to these proxies by using the GOT
@@ -345,7 +360,7 @@ public:
   /// accesses to GOT entries.
   void computeGlobalGOTEquivs(Module &M);
 
-  /// \brief Constant expressions using GOT equivalent globals may not be
+  /// Constant expressions using GOT equivalent globals may not be
   /// eligible for PC relative GOT entry conversion, in such cases we need to
   /// emit the proxies we previously omitted in EmitGlobalVariable.
   void emitGlobalGOTEquivs();
@@ -444,13 +459,16 @@ public:
   void printOffset(int64_t Offset, raw_ostream &OS) const;
 
   /// Emit a byte directive and value.
-  void EmitInt8(int Value) const;
+  void emitInt8(int Value) const;
 
   /// Emit a short directive and value.
-  void EmitInt16(int Value) const;
+  void emitInt16(int Value) const;
 
   /// Emit a long directive and value.
-  void EmitInt32(int Value) const;
+  void emitInt32(int Value) const;
+
+  /// Emit a long long directive and value.
+  void emitInt64(uint64_t Value) const;
 
   /// Emit something like ".long Hi-Lo" where the size in bytes of the directive
   /// is specified by Size and Hi/Lo specify the labels.  This implicitly uses
@@ -458,6 +476,10 @@ public:
   void EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
                            unsigned Size) const;
 
+  /// Emit something like ".uleb128 Hi-Lo".
+  void EmitLabelDifferenceAsULEB128(const MCSymbol *Hi,
+                                    const MCSymbol *Lo) const;
+
   /// Emit something like ".long Label+Offset" where the size in bytes of the
   /// directive is specified by Size and Label specifies the label.  This
   /// implicitly uses .set if it is available.
@@ -471,6 +493,9 @@ public:
     EmitLabelPlusOffset(Label, 0, Size, IsSectionRelative);
   }
 
+  /// Emit something like ".long Label + Offset".
+  void EmitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const;
+
   //===------------------------------------------------------------------===//
   // Dwarf Emission Helper Routines
   //===------------------------------------------------------------------===//
@@ -481,11 +506,6 @@ public:
   /// Emit the specified unsigned leb128 value.
   void EmitULEB128(uint64_t Value, const char *Desc = nullptr) const;
 
-  /// Emit the specified unsigned leb128 value padded to a specific number
-  /// bytes
-  void EmitPaddedULEB128(uint64_t Value, unsigned PadTo,
-                         const char *Desc = nullptr) const;
-
   /// Emit a .byte 42 directive that corresponds to an encoding.  If verbose
   /// assembly output is enabled, we output comments describing the encoding.
   /// Desc is a string saying what the encoding is specifying (e.g. "LSDA").
@@ -508,7 +528,12 @@ public:
   /// When possible, emit a DwarfStringPool section offset without any
   /// relocations, and without using the symbol.  Otherwise, defers to \a
   /// emitDwarfSymbolReference().
-  void emitDwarfStringOffset(DwarfStringPoolEntryRef S) const;
+  void emitDwarfStringOffset(DwarfStringPoolEntry S) const;
+
+  /// Emit the 4-byte offset of a string from the start of its section.
+  void emitDwarfStringOffset(DwarfStringPoolEntryRef S) const {
+    emitDwarfStringOffset(S.getEntry());
+  }
 
   /// Get the value for DW_AT_APPLE_isa. Zero if no isa encoding specified.
   virtual unsigned getISAEncoding() { return 0; }
@@ -523,10 +548,10 @@ public:
   // Dwarf Lowering Routines
   //===------------------------------------------------------------------===//
 
-  /// \brief Emit frame instruction to describe the layout of the frame.
+  /// Emit frame instruction to describe the layout of the frame.
   void emitCFIInstruction(const MCCFIInstruction &Inst) const;
 
-  /// \brief Emit Dwarf abbreviation table.
+  /// Emit Dwarf abbreviation table.
   template <typename T> void emitDwarfAbbrevs(const T &Abbrevs) const {
     // For each abbreviation.
     for (const auto &Abbrev : Abbrevs)
@@ -538,7 +563,7 @@ public:
 
   void emitDwarfAbbrev(const DIEAbbrev &Abbrev) const;
 
-  /// \brief Recursively emit Dwarf DIE tree.
+  /// Recursively emit Dwarf DIE tree.
   void emitDwarfDIE(const DIE &Die) const;
 
   //===------------------------------------------------------------------===//
@@ -625,10 +650,9 @@ private:
   void EmitXXStructorList(const DataLayout &DL, const Constant *List,
                           bool isCtor);
 
-  GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy &C);
+  GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy &S);
   /// Emit GlobalAlias or GlobalIFunc.
-  void emitGlobalIndirectSymbol(Module &M,
-                                const GlobalIndirectSymbol& GIS);
+  void emitGlobalIndirectSymbol(Module &M, const GlobalIndirectSymbol &GIS);
   void setupCodePaddingContext(const MachineBasicBlock &MBB,
                                MCCodePaddingContext &Context) const;
 };
diff --git a/include/llvm/CodeGen/AtomicExpandUtils.h b/include/llvm/CodeGen/AtomicExpandUtils.h
index 1f9c96b18e1b..b1adf66e7ff4 100644
--- a/include/llvm/CodeGen/AtomicExpandUtils.h
+++ b/include/llvm/CodeGen/AtomicExpandUtils.h
@@ -26,7 +26,7 @@ using CreateCmpXchgInstFun =
     function_ref<void(IRBuilder<> &, Value *, Value *, Value *, AtomicOrdering,
                       Value *&, Value *&)>;
 
-/// \brief Expand an atomic RMW instruction into a loop utilizing
+/// Expand an atomic RMW instruction into a loop utilizing
 /// cmpxchg. You'll want to make sure your target machine likes cmpxchg
 /// instructions in the first place and that there isn't another, better,
 /// transformation available (for example AArch32/AArch64 have linked loads).
@@ -58,7 +58,7 @@ using CreateCmpXchgInstFun =
 ///     [...]
 ///
 /// Returns true if the containing function was modified.
-bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun Factory);
+bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg);
 
 } // end namespace llvm
 
diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h
index 526ddb1b9706..f76a2426377a 100644
--- a/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/include/llvm/CodeGen/BasicTTIImpl.h
@@ -26,7 +26,6 @@
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/TargetTransformInfoImpl.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/CodeGen/ValueTypes.h"
@@ -47,6 +46,7 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
 #include "llvm/Support/MathExtras.h"
 #include <algorithm>
 #include <cassert>
@@ -65,7 +65,7 @@ class TargetMachine;
 
 extern cl::opt<unsigned> PartialUnrollingThreshold;
 
-/// \brief Base class which can be used to help build a TTI implementation.
+/// Base class which can be used to help build a TTI implementation.
 ///
 /// This class provides as much implementation of the TTI interface as is
 /// possible using the target independent parts of the code generator.
@@ -101,16 +101,32 @@ private:
     return Cost;
   }
 
-  /// \brief Local query method delegates up to T which *must* implement this!
+  /// Local query method delegates up to T which *must* implement this!
   const TargetSubtargetInfo *getST() const {
     return static_cast<const T *>(this)->getST();
   }
 
-  /// \brief Local query method delegates up to T which *must* implement this!
+  /// Local query method delegates up to T which *must* implement this!
   const TargetLoweringBase *getTLI() const {
     return static_cast<const T *>(this)->getTLI();
   }
 
+  static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
+    switch (M) {
+      case TTI::MIM_Unindexed:
+        return ISD::UNINDEXED;
+      case TTI::MIM_PreInc:
+        return ISD::PRE_INC;
+      case TTI::MIM_PreDec:
+        return ISD::PRE_DEC;
+      case TTI::MIM_PostInc:
+        return ISD::POST_INC;
+      case TTI::MIM_PostDec:
+        return ISD::POST_DEC;
+    }
+    llvm_unreachable("Unexpected MemIndexedMode");
+  }
+
 protected:
   explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
       : BaseT(DL) {}
@@ -157,6 +173,18 @@ public:
     return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
   }
 
+  bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty,
+                          const DataLayout &DL) const {
+    EVT VT = getTLI()->getValueType(DL, Ty);
+    return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
+  }
+
+  bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty,
+                           const DataLayout &DL) const {
+    EVT VT = getTLI()->getValueType(DL, Ty);
+    return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
+  }
+
   bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
     return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
   }
@@ -179,6 +207,8 @@ public:
     return getTLI()->isProfitableToHoist(I);
   }
 
+  bool useAA() const { return getST()->useAA(); }
+
   bool isTypeLegal(Type *Ty) {
     EVT VT = getTLI()->getValueType(DL, Ty);
     return getTLI()->isTypeLegal(VT);
@@ -240,7 +270,7 @@ public:
     bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
 
     // Early exit if both a jump table and bit test are not allowed.
-    if (N < 1 || (!IsJTAllowed && DL.getPointerSizeInBits() < N))
+    if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N))
       return N;
 
     APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
@@ -254,7 +284,7 @@ public:
     }
 
     // Check if suitable for a bit test
-    if (N <= DL.getPointerSizeInBits()) {
+    if (N <= DL.getIndexSizeInBits(0u)) {
       SmallPtrSet<const BasicBlock *, 4> Dests;
       for (auto I : SI.cases())
         Dests.insert(I.getCaseSuccessor());
@@ -523,11 +553,15 @@ public:
 
   unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
                           Type *SubTp) {
-    if (Kind == TTI::SK_Alternate || Kind == TTI::SK_PermuteTwoSrc ||
-        Kind == TTI::SK_PermuteSingleSrc) {
+    switch (Kind) {
+    case TTI::SK_Select:
+    case TTI::SK_Transpose:
+    case TTI::SK_PermuteSingleSrc:
+    case TTI::SK_PermuteTwoSrc:
       return getPermuteShuffleOverhead(Tp);
+    default:
+      return 1;
     }
-    return 1;
   }
 
   unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
@@ -614,7 +648,7 @@ public:
       }
 
       // If we are legalizing by splitting, query the concrete TTI for the cost
-      // of casting the original vector twice. We also need to factor int the
+      // of casting the original vector twice. We also need to factor in the
       // cost of the split itself. Count that as 1, to be consistent with
       // TLI->getTypeLegalizationCost().
       if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
@@ -916,6 +950,20 @@ public:
                                                        RetTy, Args[0], VarMask,
                                                        Alignment);
     }
+    case Intrinsic::experimental_vector_reduce_add:
+    case Intrinsic::experimental_vector_reduce_mul:
+    case Intrinsic::experimental_vector_reduce_and:
+    case Intrinsic::experimental_vector_reduce_or:
+    case Intrinsic::experimental_vector_reduce_xor:
+    case Intrinsic::experimental_vector_reduce_fadd:
+    case Intrinsic::experimental_vector_reduce_fmul:
+    case Intrinsic::experimental_vector_reduce_smax:
+    case Intrinsic::experimental_vector_reduce_smin:
+    case Intrinsic::experimental_vector_reduce_fmax:
+    case Intrinsic::experimental_vector_reduce_fmin:
+    case Intrinsic::experimental_vector_reduce_umax:
+    case Intrinsic::experimental_vector_reduce_umin:
+      return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF);
     }
   }
 
@@ -1039,6 +1087,39 @@ public:
     case Intrinsic::masked_load:
       return static_cast<T *>(this)
           ->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
+    case Intrinsic::experimental_vector_reduce_add:
+      return static_cast<T *>(this)->getArithmeticReductionCost(
+          Instruction::Add, Tys[0], /*IsPairwiseForm=*/false);
+    case Intrinsic::experimental_vector_reduce_mul:
+      return static_cast<T *>(this)->getArithmeticReductionCost(
+          Instruction::Mul, Tys[0], /*IsPairwiseForm=*/false);
+    case Intrinsic::experimental_vector_reduce_and:
+      return static_cast<T *>(this)->getArithmeticReductionCost(
+          Instruction::And, Tys[0], /*IsPairwiseForm=*/false);
+    case Intrinsic::experimental_vector_reduce_or:
+      return static_cast<T *>(this)->getArithmeticReductionCost(
+          Instruction::Or, Tys[0], /*IsPairwiseForm=*/false);
+    case Intrinsic::experimental_vector_reduce_xor:
+      return static_cast<T *>(this)->getArithmeticReductionCost(
+          Instruction::Xor, Tys[0], /*IsPairwiseForm=*/false);
+    case Intrinsic::experimental_vector_reduce_fadd:
+      return static_cast<T *>(this)->getArithmeticReductionCost(
+          Instruction::FAdd, Tys[0], /*IsPairwiseForm=*/false);
+    case Intrinsic::experimental_vector_reduce_fmul:
+      return static_cast<T *>(this)->getArithmeticReductionCost(
+          Instruction::FMul, Tys[0], /*IsPairwiseForm=*/false);
+    case Intrinsic::experimental_vector_reduce_smax:
+    case Intrinsic::experimental_vector_reduce_smin:
+    case Intrinsic::experimental_vector_reduce_fmax:
+    case Intrinsic::experimental_vector_reduce_fmin:
+      return static_cast<T *>(this)->getMinMaxReductionCost(
+          Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
+          /*IsSigned=*/true);
+    case Intrinsic::experimental_vector_reduce_umax:
+    case Intrinsic::experimental_vector_reduce_umin:
+      return static_cast<T *>(this)->getMinMaxReductionCost(
+          Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
+          /*IsSigned=*/false);
     case Intrinsic::ctpop:
       ISDs.push_back(ISD::CTPOP);
       // In case of legalization use TCC_Expensive. This is cheaper than a
@@ -1123,7 +1204,7 @@ public:
     return SingleCallCost;
   }
 
-  /// \brief Compute a cost of the given call instruction.
+  /// Compute a cost of the given call instruction.
   ///
   /// Compute the cost of calling function F with return type RetTy and
   /// argument types Tys. F might be nullptr, in this case the cost of an
@@ -1284,7 +1365,7 @@ public:
   /// @}
 };
 
-/// \brief Concrete BasicTTIImpl that can be used if no further customization
+/// Concrete BasicTTIImpl that can be used if no further customization
 /// is needed.
 class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
   using BaseT = BasicTTIImplBase<BasicTTIImpl>;
@@ -1298,7 +1379,7 @@ class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
   const TargetLoweringBase *getTLI() const { return TLI; }
 
 public:
-  explicit BasicTTIImpl(const TargetMachine *ST, const Function &F);
+  explicit BasicTTIImpl(const TargetMachine *TM, const Function &F);
 };
 
 } // end namespace llvm
diff --git a/include/llvm/CodeGen/CalcSpillWeights.h b/include/llvm/CodeGen/CalcSpillWeights.h
index d9e8206408a7..f85767f1fc11 100644
--- a/include/llvm/CodeGen/CalcSpillWeights.h
+++ b/include/llvm/CodeGen/CalcSpillWeights.h
@@ -22,7 +22,7 @@ class MachineFunction;
 class MachineLoopInfo;
 class VirtRegMap;
 
-  /// \brief Normalize the spill weight of a live interval
+  /// Normalize the spill weight of a live interval
   ///
   /// The spill weight of a live interval is computed as:
   ///
@@ -42,7 +42,7 @@ class VirtRegMap;
     return UseDefFreq / (Size + 25*SlotIndex::InstrDist);
   }
 
-  /// \brief Calculate auxiliary information for a virtual register such as its
+  /// Calculate auxiliary information for a virtual register such as its
   /// spill weight and allocation hint.
   class VirtRegAuxInfo {
   public:
@@ -64,10 +64,10 @@ class VirtRegMap;
                    NormalizingFn norm = normalizeSpillWeight)
         : MF(mf), LIS(lis), VRM(vrm), Loops(loops), MBFI(mbfi), normalize(norm) {}
 
-    /// \brief (re)compute li's spill weight and allocation hint.
+    /// (re)compute li's spill weight and allocation hint.
     void calculateSpillWeightAndHint(LiveInterval &li);
 
-    /// \brief Compute future expected spill weight of a split artifact of li
+    /// Compute future expected spill weight of a split artifact of li
     /// that will span between start and end slot indexes.
     /// \param li     The live interval to be split.
     /// \param start  The expected begining of the split artifact. Instructions
@@ -78,7 +78,7 @@ class VirtRegMap;
     /// negative weight for unspillable li.
     float futureWeight(LiveInterval &li, SlotIndex start, SlotIndex end);
 
-    /// \brief Helper function for weight calculations.
+    /// Helper function for weight calculations.
     /// (Re)compute li's spill weight and allocation hint, or, for non null
     /// start and end - compute future expected spill weight of a split
     /// artifact of li that will span between start and end slot indexes.
@@ -94,7 +94,7 @@ class VirtRegMap;
                            SlotIndex *end = nullptr);
   };
 
-  /// \brief Compute spill weights and allocation hints for all virtual register
+  /// Compute spill weights and allocation hints for all virtual register
   /// live intervals.
   void calculateSpillWeightsAndHints(LiveIntervals &LIS, MachineFunction &MF,
                                      VirtRegMap *VRM,
diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h
index d30a27328c01..efcf80ba0b4e 100644
--- a/include/llvm/CodeGen/CallingConvLower.h
+++ b/include/llvm/CodeGen/CallingConvLower.h
@@ -304,7 +304,7 @@ public:
   /// CheckReturn - Analyze the return values of a function, returning
   /// true if the return can be performed without sret-demotion, and
   /// false otherwise.
-  bool CheckReturn(const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
+  bool CheckReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
                    CCAssignFn Fn);
 
   /// AnalyzeCallOperands - Analyze the outgoing arguments to a call,
diff --git a/include/llvm/CodeGen/CommandFlags.def b/include/llvm/CodeGen/CommandFlags.inc
index fe96033a9c61..7d2d167289e0 100644
--- a/include/llvm/CodeGen/CommandFlags.def
+++ b/include/llvm/CodeGen/CommandFlags.inc
@@ -17,7 +17,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
-#include "llvm/MC/MCTargetOptionsCommandFlags.def"
+#include "llvm/MC/MCTargetOptionsCommandFlags.inc"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/CommandLine.h"
@@ -98,7 +98,9 @@ static cl::opt<llvm::ExceptionHandling> ExceptionModel(
         clEnumValN(ExceptionHandling::SjLj, "sjlj", "SjLj exception handling"),
         clEnumValN(ExceptionHandling::ARM, "arm", "ARM EHABI exceptions"),
         clEnumValN(ExceptionHandling::WinEH, "wineh",
-                   "Windows exception model")));
+                   "Windows exception model"),
+        clEnumValN(ExceptionHandling::Wasm, "wasm",
+                   "WebAssembly exception handling")));
 
 static cl::opt<TargetMachine::CodeGenFileType> FileType(
     "filetype", cl::init(TargetMachine::CGFT_AssemblyFile),
@@ -259,6 +261,10 @@ static cl::opt<bool> EnableStackSizeSection(
     "stack-size-section",
     cl::desc("Emit a section containing stack size metadata"), cl::init(false));
 
+static cl::opt<bool>
+    EnableAddrsig("addrsig", cl::desc("Emit an address-significance table"),
+                  cl::init(false));
+
 // Common utility function tightly tied to the options listed here. Initializes
 // a TargetOptions object with CodeGen flags and returns it.
 static TargetOptions InitTargetOptionsFromCodeGenFlags() {
@@ -284,8 +290,10 @@ static TargetOptions InitTargetOptionsFromCodeGenFlags() {
   Options.FunctionSections = FunctionSections;
   Options.UniqueSectionNames = UniqueSectionNames;
   Options.EmulatedTLS = EmulatedTLS;
+  Options.ExplicitEmulatedTLS = EmulatedTLS.getNumOccurrences() > 0;
   Options.ExceptionModel = ExceptionModel;
   Options.EmitStackSizeSection = EnableStackSizeSection;
+  Options.EmitAddrsig = EnableAddrsig;
 
   Options.MCOptions = InitMCTargetOptionsFromFlags();
 
@@ -326,7 +334,27 @@ LLVM_ATTRIBUTE_UNUSED static std::string getFeaturesStr() {
   return Features.getString();
 }
 
-/// \brief Set function attributes of functions in Module M based on CPU,
+LLVM_ATTRIBUTE_UNUSED static std::vector<std::string> getFeatureList() {
+  SubtargetFeatures Features;
+
+  // If user asked for the 'native' CPU, we need to autodetect features.
+  // This is necessary for x86 where the CPU might not support all the
+  // features the autodetected CPU name lists in the target. For example,
+  // not all Sandybridge processors support AVX.
+  if (MCPU == "native") {
+    StringMap<bool> HostFeatures;
+    if (sys::getHostCPUFeatures(HostFeatures))
+      for (auto &F : HostFeatures)
+        Features.AddFeature(F.first(), F.second);
+  }
+
+  for (unsigned i = 0; i != MAttrs.size(); ++i)
+    Features.AddFeature(MAttrs[i]);
+
+  return Features.getFeatures();
+}
+
+/// Set function attributes of functions in Module M based on CPU,
 /// Features, and command line flags.
 LLVM_ATTRIBUTE_UNUSED static void
 setFunctionAttributes(StringRef CPU, StringRef Features, Module &M) {
diff --git a/include/llvm/CodeGen/CostTable.h b/include/llvm/CodeGen/CostTable.h
index 5a6368c5a0f8..48ad76971520 100644
--- a/include/llvm/CodeGen/CostTable.h
+++ b/include/llvm/CodeGen/CostTable.h
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// \brief Cost tables and simple lookup functions
+/// Cost tables and simple lookup functions
 ///
 //===----------------------------------------------------------------------===//
 
@@ -17,7 +17,7 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/Support/MachineValueType.h"
 
 namespace llvm {
 
diff --git a/include/llvm/CodeGen/DIE.h b/include/llvm/CodeGen/DIE.h
index f809fc97fe59..7d486b1df56d 100644
--- a/include/llvm/CodeGen/DIE.h
+++ b/include/llvm/CodeGen/DIE.h
@@ -136,7 +136,7 @@ class DIEAbbrevSet {
   /// The bump allocator to use when creating DIEAbbrev objects in the uniqued
   /// storage container.
   BumpPtrAllocator &Alloc;
-  /// \brief FoldingSet that uniques the abbreviations.
+  /// FoldingSet that uniques the abbreviations.
   FoldingSet<DIEAbbrev> AbbreviationsSet;
   /// A list of all the unique abbreviations in use.
   std::vector<DIEAbbrev *> Abbreviations;
@@ -190,7 +190,7 @@ public:
   uint64_t getValue() const { return Integer; }
   void setValue(uint64_t Val) { Integer = Val; }
 
-  void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+  void EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const;
   unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
 
   void print(raw_ostream &O) const;
@@ -868,7 +868,7 @@ public:
     return dwarf::DW_FORM_block;
   }
 
-  void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+  void EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const;
   unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
 
   void print(raw_ostream &O) const;
@@ -899,7 +899,7 @@ public:
     return dwarf::DW_FORM_block;
   }
 
-  void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+  void EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const;
   unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
 
   void print(raw_ostream &O) const;
diff --git a/include/llvm/CodeGen/DwarfStringPoolEntry.h b/include/llvm/CodeGen/DwarfStringPoolEntry.h
index fc2b5ddd2d2c..e6c0483cfc35 100644
--- a/include/llvm/CodeGen/DwarfStringPoolEntry.h
+++ b/include/llvm/CodeGen/DwarfStringPoolEntry.h
@@ -41,6 +41,8 @@ public:
   unsigned getOffset() const { return I->second.Offset; }
   unsigned getIndex() const { return I->second.Index; }
   StringRef getString() const { return I->first(); }
+  /// Return the entire string pool entry for convenience.
+  DwarfStringPoolEntry getEntry() const { return I->getValue(); }
 
   bool operator==(const DwarfStringPoolEntryRef &X) const { return I == X.I; }
   bool operator!=(const DwarfStringPoolEntryRef &X) const { return I != X.I; }
diff --git a/include/llvm/CodeGen/ExecutionDepsFix.h b/include/llvm/CodeGen/ExecutionDepsFix.h
deleted file mode 100644
index f4db8b7322da..000000000000
--- a/include/llvm/CodeGen/ExecutionDepsFix.h
+++ /dev/null
@@ -1,230 +0,0 @@
-//==- llvm/CodeGen/ExecutionDepsFix.h - Execution Dependency Fix -*- C++ -*-==//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file Execution Dependency Fix pass.
-///
-/// Some X86 SSE instructions like mov, and, or, xor are available in different
-/// variants for different operand types. These variant instructions are
-/// equivalent, but on Nehalem and newer cpus there is extra latency
-/// transferring data between integer and floating point domains.  ARM cores
-/// have similar issues when they are configured with both VFP and NEON
-/// pipelines.
-///
-/// This pass changes the variant instructions to minimize domain crossings.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_EXECUTIONDEPSFIX_H
-#define LLVM_CODEGEN_EXECUTIONDEPSFIX_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/iterator_range.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/LivePhysRegs.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/MathExtras.h"
-#include <cassert>
-#include <limits>
-#include <utility>
-#include <vector>
-
-namespace llvm {
-
-class MachineBasicBlock;
-class MachineInstr;
-class TargetInstrInfo;
-
-/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track
-/// of execution domains.
-///
-/// An open DomainValue represents a set of instructions that can still switch
-/// execution domain. Multiple registers may refer to the same open
-/// DomainValue - they will eventually be collapsed to the same execution
-/// domain.
-///
-/// A collapsed DomainValue represents a single register that has been forced
-/// into one of more execution domains. There is a separate collapsed
-/// DomainValue for each register, but it may contain multiple execution
-/// domains. A register value is initially created in a single execution
-/// domain, but if we were forced to pay the penalty of a domain crossing, we
-/// keep track of the fact that the register is now available in multiple
-/// domains.
-struct DomainValue {
-  // Basic reference counting.
-  unsigned Refs = 0;
-
-  // Bitmask of available domains. For an open DomainValue, it is the still
-  // possible domains for collapsing. For a collapsed DomainValue it is the
-  // domains where the register is available for free.
-  unsigned AvailableDomains;
-
-  // Pointer to the next DomainValue in a chain.  When two DomainValues are
-  // merged, Victim.Next is set to point to Victor, so old DomainValue
-  // references can be updated by following the chain.
-  DomainValue *Next;
-
-  // Twiddleable instructions using or defining these registers.
-  SmallVector<MachineInstr*, 8> Instrs;
-
-  DomainValue() { clear(); }
-
-  // A collapsed DomainValue has no instructions to twiddle - it simply keeps
-  // track of the domains where the registers are already available.
-  bool isCollapsed() const { return Instrs.empty(); }
-
-  // Is domain available?
-  bool hasDomain(unsigned domain) const {
-    assert(domain <
-               static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
-           "undefined behavior");
-    return AvailableDomains & (1u << domain);
-  }
-
-  // Mark domain as available.
-  void addDomain(unsigned domain) {
-    AvailableDomains |= 1u << domain;
-  }
-
-  // Restrict to a single domain available.
-  void setSingleDomain(unsigned domain) {
-    AvailableDomains = 1u << domain;
-  }
-
-  // Return bitmask of domains that are available and in mask.
-  unsigned getCommonDomains(unsigned mask) const {
-    return AvailableDomains & mask;
-  }
-
-  // First domain available.
-  unsigned getFirstDomain() const {
-    return countTrailingZeros(AvailableDomains);
-  }
-
-  // Clear this DomainValue and point to next which has all its data.
-  void clear() {
-    AvailableDomains = 0;
-    Next = nullptr;
-    Instrs.clear();
-  }
-};
-
-/// Information about a live register.
-struct LiveReg {
-  /// Value currently in this register, or NULL when no value is being tracked.
-  /// This counts as a DomainValue reference.
-  DomainValue *Value;
-
-  /// Instruction that defined this register, relative to the beginning of the
-  /// current basic block.  When a LiveReg is used to represent a live-out
-  /// register, this value is relative to the end of the basic block, so it
-  /// will be a negative number.
-  int Def;
-};
-
-class ExecutionDepsFix : public MachineFunctionPass {
-  SpecificBumpPtrAllocator<DomainValue> Allocator;
-  SmallVector<DomainValue*,16> Avail;
-
-  const TargetRegisterClass *const RC;
-  MachineFunction *MF;
-  const TargetInstrInfo *TII;
-  const TargetRegisterInfo *TRI;
-  RegisterClassInfo RegClassInfo;
-  std::vector<SmallVector<int, 1>> AliasMap;
-  const unsigned NumRegs;
-  LiveReg *LiveRegs;
-  struct MBBInfo {
-    // Keeps clearance and domain information for all registers. Note that this
-    // is different from the usual definition notion of liveness. The CPU
-    // doesn't care whether or not we consider a register killed.
-    LiveReg *OutRegs = nullptr;
-
-    // Whether we have gotten to this block in primary processing yet.
-    bool PrimaryCompleted = false;
-
-    // The number of predecessors for which primary processing has completed
-    unsigned IncomingProcessed = 0;
-
-    // The value of `IncomingProcessed` at the start of primary processing
-    unsigned PrimaryIncoming = 0;
-
-    // The number of predecessors for which all processing steps are done.
-    unsigned IncomingCompleted = 0;
-
-    MBBInfo() = default;
-  };
-  using MBBInfoMap = DenseMap<MachineBasicBlock *, MBBInfo>;
-  MBBInfoMap MBBInfos;
-
-  /// List of undefined register reads in this block in forward order.
-  std::vector<std::pair<MachineInstr *, unsigned>> UndefReads;
-
-  /// Storage for register unit liveness.
-  LivePhysRegs LiveRegSet;
-
-  /// Current instruction number.
-  /// The first instruction in each basic block is 0.
-  int CurInstr;
-
-public:
-  ExecutionDepsFix(char &PassID, const TargetRegisterClass &RC)
-    : MachineFunctionPass(PassID), RC(&RC), NumRegs(RC.getNumRegs()) {}
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.setPreservesAll();
-    MachineFunctionPass::getAnalysisUsage(AU);
-  }
-
-  bool runOnMachineFunction(MachineFunction &MF) override;
-
-  MachineFunctionProperties getRequiredProperties() const override {
-    return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::NoVRegs);
-  }
-
-private:
-  iterator_range<SmallVectorImpl<int>::const_iterator>
-  regIndices(unsigned Reg) const;
-  // DomainValue allocation.
-  DomainValue *alloc(int domain = -1);
-  DomainValue *retain(DomainValue *DV) {
-    if (DV) ++DV->Refs;
-    return DV;
-  }
-  void release(DomainValue*);
-  DomainValue *resolve(DomainValue*&);
-
-  // LiveRegs manipulations.
-  void setLiveReg(int rx, DomainValue *DV);
-  void kill(int rx);
-  void force(int rx, unsigned domain);
-  void collapse(DomainValue *dv, unsigned domain);
-  bool merge(DomainValue *A, DomainValue *B);
-
-  void enterBasicBlock(MachineBasicBlock*);
-  void leaveBasicBlock(MachineBasicBlock*);
-  bool isBlockDone(MachineBasicBlock *);
-  void processBasicBlock(MachineBasicBlock *MBB, bool PrimaryPass);
-  bool visitInstr(MachineInstr *);
-  void processDefs(MachineInstr *, bool breakDependency, bool Kill);
-  void visitSoftInstr(MachineInstr*, unsigned mask);
-  void visitHardInstr(MachineInstr*, unsigned domain);
-  bool pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
-                                unsigned Pref);
-  bool shouldBreakDependence(MachineInstr*, unsigned OpIdx, unsigned Pref);
-  void processUndefReads(MachineBasicBlock*);
-};
-
-} // end namepsace llvm
-
-#endif // LLVM_CODEGEN_EXECUTIONDEPSFIX_H
diff --git a/include/llvm/CodeGen/ExecutionDomainFix.h b/include/llvm/CodeGen/ExecutionDomainFix.h
new file mode 100644
index 000000000000..338c214dd073
--- /dev/null
+++ b/include/llvm/CodeGen/ExecutionDomainFix.h
@@ -0,0 +1,213 @@
+//==-- llvm/CodeGen/ExecutionDomainFix.h - Execution Domain Fix -*- C++ -*--==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Execution Domain Fix pass.
+///
+/// Some X86 SSE instructions like mov, and, or, xor are available in different
+/// variants for different operand types. These variant instructions are
+/// equivalent, but on Nehalem and newer cpus there is extra latency
+/// transferring data between integer and floating point domains.  ARM cores
+/// have similar issues when they are configured with both VFP and NEON
+/// pipelines.
+///
+/// This pass changes the variant instructions to minimize domain crossings.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_EXECUTIONDOMAINFIX_H
+#define LLVM_CODEGEN_EXECUTIONDOMAINFIX_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LoopTraversal.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/ReachingDefAnalysis.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+
+namespace llvm {
+
+class MachineBasicBlock;
+class MachineInstr;
+class TargetInstrInfo;
+
+/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track
+/// of execution domains.
+///
+/// An open DomainValue represents a set of instructions that can still switch
+/// execution domain. Multiple registers may refer to the same open
+/// DomainValue - they will eventually be collapsed to the same execution
+/// domain.
+///
+/// A collapsed DomainValue represents a single register that has been forced
+/// into one of more execution domains. There is a separate collapsed
+/// DomainValue for each register, but it may contain multiple execution
+/// domains. A register value is initially created in a single execution
+/// domain, but if we were forced to pay the penalty of a domain crossing, we
+/// keep track of the fact that the register is now available in multiple
+/// domains.
+struct DomainValue {
+  /// Basic reference counting.
+  unsigned Refs = 0;
+
+  /// Bitmask of available domains. For an open DomainValue, it is the still
+  /// possible domains for collapsing. For a collapsed DomainValue it is the
+  /// domains where the register is available for free.
+  unsigned AvailableDomains;
+
+  /// Pointer to the next DomainValue in a chain.  When two DomainValues are
+  /// merged, Victim.Next is set to point to Victor, so old DomainValue
+  /// references can be updated by following the chain.
+  DomainValue *Next;
+
+  /// Twiddleable instructions using or defining these registers.
+  SmallVector<MachineInstr *, 8> Instrs;
+
+  DomainValue() { clear(); }
+
+  /// A collapsed DomainValue has no instructions to twiddle - it simply keeps
+  /// track of the domains where the registers are already available.
+  bool isCollapsed() const { return Instrs.empty(); }
+
+  /// Is domain available?
+  bool hasDomain(unsigned domain) const {
+    assert(domain <
+               static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
+           "undefined behavior");
+    return AvailableDomains & (1u << domain);
+  }
+
+  /// Mark domain as available.
+  void addDomain(unsigned domain) { AvailableDomains |= 1u << domain; }
+
+  // Restrict to a single domain available.
+  void setSingleDomain(unsigned domain) { AvailableDomains = 1u << domain; }
+
+  /// Return bitmask of domains that are available and in mask.
+  unsigned getCommonDomains(unsigned mask) const {
+    return AvailableDomains & mask;
+  }
+
+  /// First domain available.
+  unsigned getFirstDomain() const {
+    return countTrailingZeros(AvailableDomains);
+  }
+
+  /// Clear this DomainValue and point to next which has all its data.
+  void clear() {
+    AvailableDomains = 0;
+    Next = nullptr;
+    Instrs.clear();
+  }
+};
+
+class ExecutionDomainFix : public MachineFunctionPass {
+  SpecificBumpPtrAllocator<DomainValue> Allocator;
+  SmallVector<DomainValue *, 16> Avail;
+
+  const TargetRegisterClass *const RC;
+  MachineFunction *MF;
+  const TargetInstrInfo *TII;
+  const TargetRegisterInfo *TRI;
+  std::vector<SmallVector<int, 1>> AliasMap;
+  const unsigned NumRegs;
+  /// Value currently in each register, or NULL when no value is being tracked.
+  /// This counts as a DomainValue reference.
+  using LiveRegsDVInfo = std::vector<DomainValue *>;
+  LiveRegsDVInfo LiveRegs;
+  /// Keeps domain information for all registers. Note that this
+  /// is different from the usual definition notion of liveness. The CPU
+  /// doesn't care whether or not we consider a register killed.
+  using OutRegsInfoMap = SmallVector<LiveRegsDVInfo, 4>;
+  OutRegsInfoMap MBBOutRegsInfos;
+
+  ReachingDefAnalysis *RDA;
+
+public:
+  ExecutionDomainFix(char &PassID, const TargetRegisterClass &RC)
+      : MachineFunctionPass(PassID), RC(&RC), NumRegs(RC.getNumRegs()) {}
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+    AU.addRequired<ReachingDefAnalysis>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::NoVRegs);
+  }
+
+private:
+  /// Translate TRI register number to a list of indices into our smaller tables
+  /// of interesting registers.
+  iterator_range<SmallVectorImpl<int>::const_iterator>
+  regIndices(unsigned Reg) const;
+
+  /// DomainValue allocation.
+  DomainValue *alloc(int domain = -1);
+
+  /// Add reference to DV.
+  DomainValue *retain(DomainValue *DV) {
+    if (DV)
+      ++DV->Refs;
+    return DV;
+  }
+
+  /// Release a reference to DV.  When the last reference is released,
+  /// collapse if needed.
+  void release(DomainValue *);
+
+  /// Follow the chain of dead DomainValues until a live DomainValue is reached.
+  /// Update the referenced pointer when necessary.
+  DomainValue *resolve(DomainValue *&);
+
+  /// Set LiveRegs[rx] = dv, updating reference counts.
+  void setLiveReg(int rx, DomainValue *DV);
+
+  /// Kill register rx, recycle or collapse any DomainValue.
+  void kill(int rx);
+
+  /// Force register rx into domain.
+  void force(int rx, unsigned domain);
+
+  /// Collapse open DomainValue into given domain. If there are multiple
+  /// registers using dv, they each get a unique collapsed DomainValue.
+  void collapse(DomainValue *dv, unsigned domain);
+
+  /// All instructions and registers in B are moved to A, and B is released.
+  bool merge(DomainValue *A, DomainValue *B);
+
+  /// Set up LiveRegs by merging predecessor live-out values.
+  void enterBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB);
+
+  /// Update live-out values.
+  void leaveBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB);
+
+  /// Process he given basic block.
+  void processBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB);
+
+  /// Visit given insturcion.
+  bool visitInstr(MachineInstr *);
+
+  /// Update def-ages for registers defined by MI.
+  /// If Kill is set, also kill off DomainValues clobbered by the defs.
+  void processDefs(MachineInstr *, bool Kill);
+
+  /// A soft instruction can be changed to work in other domains given by mask.
+  void visitSoftInstr(MachineInstr *, unsigned mask);
+
+  /// A hard instruction only works in one domain. All input registers will be
+  /// forced into that domain.
+  void visitHardInstr(MachineInstr *, unsigned domain);
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_EXECUTIONDOMAINFIX_H
diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h
index 85bb826dcb8c..865d8a88b8cc 100644
--- a/include/llvm/CodeGen/FastISel.h
+++ b/include/llvm/CodeGen/FastISel.h
@@ -19,7 +19,6 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/CallSite.h"
@@ -28,6 +27,7 @@
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/MachineValueType.h"
 #include <algorithm>
 #include <cstdint>
 #include <utility>
@@ -61,7 +61,7 @@ class Type;
 class User;
 class Value;
 
-/// \brief This is a fast-path instruction selection class that generates poor
+/// This is a fast-path instruction selection class that generates poor
 /// code and doesn't support illegal types or non-trivial lowering, but runs
 /// quickly.
 class FastISel {
@@ -78,7 +78,7 @@ public:
     bool IsReturnValueUsed : 1;
     bool IsPatchPoint : 1;
 
-    // \brief IsTailCall Should be modified by implementations of FastLowerCall
+    // IsTailCall Should be modified by implementations of FastLowerCall
     // that perform tail call conversions.
     bool IsTailCall = false;
 
@@ -215,67 +215,74 @@ protected:
   const TargetLibraryInfo *LibInfo;
   bool SkipTargetIndependentISel;
 
-  /// \brief The position of the last instruction for materializing constants
+  /// The position of the last instruction for materializing constants
   /// for use in the current block. It resets to EmitStartPt when it makes sense
   /// (for example, it's usually profitable to avoid function calls between the
   /// definition and the use)
   MachineInstr *LastLocalValue;
 
-  /// \brief The top most instruction in the current block that is allowed for
+  /// The top most instruction in the current block that is allowed for
   /// emitting local variables. LastLocalValue resets to EmitStartPt when it
   /// makes sense (for example, on function calls)
   MachineInstr *EmitStartPt;
 
+  /// Last local value flush point. On a subsequent flush, no local value will
+  /// sink past this point.
+  MachineBasicBlock::iterator LastFlushPoint;
+
 public:
   virtual ~FastISel();
 
-  /// \brief Return the position of the last instruction emitted for
+  /// Return the position of the last instruction emitted for
   /// materializing constants for use in the current block.
   MachineInstr *getLastLocalValue() { return LastLocalValue; }
 
-  /// \brief Update the position of the last instruction emitted for
+  /// Update the position of the last instruction emitted for
   /// materializing constants for use in the current block.
   void setLastLocalValue(MachineInstr *I) {
     EmitStartPt = I;
     LastLocalValue = I;
   }
 
-  /// \brief Set the current block to which generated machine instructions will
-  /// be appended, and clear the local CSE map.
+  /// Set the current block to which generated machine instructions will
+  /// be appended.
   void startNewBlock();
 
-  /// \brief Return current debug location information.
+  /// Flush the local value map and sink local values if possible.
+  void finishBasicBlock();
+
+  /// Return current debug location information.
   DebugLoc getCurDebugLoc() const { return DbgLoc; }
 
-  /// \brief Do "fast" instruction selection for function arguments and append
+  /// Do "fast" instruction selection for function arguments and append
   /// the machine instructions to the current block. Returns true when
   /// successful.
   bool lowerArguments();
 
-  /// \brief Do "fast" instruction selection for the given LLVM IR instruction
+  /// Do "fast" instruction selection for the given LLVM IR instruction
   /// and append the generated machine instructions to the current block.
   /// Returns true if selection was successful.
   bool selectInstruction(const Instruction *I);
 
-  /// \brief Do "fast" instruction selection for the given LLVM IR operator
+  /// Do "fast" instruction selection for the given LLVM IR operator
   /// (Instruction or ConstantExpr), and append generated machine instructions
   /// to the current block. Return true if selection was successful.
   bool selectOperator(const User *I, unsigned Opcode);
 
-  /// \brief Create a virtual register and arrange for it to be assigned the
+  /// Create a virtual register and arrange for it to be assigned the
   /// value for the given LLVM value.
   unsigned getRegForValue(const Value *V);
 
-  /// \brief Look up the value to see if its value is already cached in a
+  /// Look up the value to see if its value is already cached in a
   /// register. It may be defined by instructions across blocks or defined
   /// locally.
   unsigned lookUpRegForValue(const Value *V);
 
-  /// \brief This is a wrapper around getRegForValue that also takes care of
+  /// This is a wrapper around getRegForValue that also takes care of
   /// truncating or sign-extending the given getelementptr index value.
-  std::pair<unsigned, bool> getRegForGEPIndex(const Value *V);
+  std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
 
-  /// \brief We're checking to see if we can fold \p LI into \p FoldInst. Note
+  /// We're checking to see if we can fold \p LI into \p FoldInst. Note
   /// that we could have a sequence where multiple LLVM IR instructions are
   /// folded into the same machineinstr.  For example we could have:
   ///
@@ -289,7 +296,7 @@ public:
   /// If we succeed folding, return true.
   bool tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst);
 
-  /// \brief The specified machine instr operand is a vreg, and that vreg is
+  /// The specified machine instr operand is a vreg, and that vreg is
   /// being provided by the specified load instruction.  If possible, try to
   /// fold the load as an operand to the instruction, returning true if
   /// possible.
@@ -300,11 +307,11 @@ public:
     return false;
   }
 
-  /// \brief Reset InsertPt to prepare for inserting instructions into the
+  /// Reset InsertPt to prepare for inserting instructions into the
   /// current block.
   void recomputeInsertPt();
 
-  /// \brief Remove all dead instructions between the I and E.
+  /// Remove all dead instructions between the I and E.
   void removeDeadCode(MachineBasicBlock::iterator I,
                       MachineBasicBlock::iterator E);
 
@@ -313,11 +320,11 @@ public:
     DebugLoc DL;
   };
 
-  /// \brief Prepare InsertPt to begin inserting instructions into the local
+  /// Prepare InsertPt to begin inserting instructions into the local
   /// value area and return the old insert position.
   SavePoint enterLocalValueArea();
 
-  /// \brief Reset InsertPt to the given old insert position.
+  /// Reset InsertPt to the given old insert position.
   void leaveLocalValueArea(SavePoint Old);
 
 protected:
@@ -325,45 +332,45 @@ protected:
                     const TargetLibraryInfo *LibInfo,
                     bool SkipTargetIndependentISel = false);
 
-  /// \brief This method is called by target-independent code when the normal
+  /// This method is called by target-independent code when the normal
   /// FastISel process fails to select an instruction. This gives targets a
   /// chance to emit code for anything that doesn't fit into FastISel's
   /// framework. It returns true if it was successful.
   virtual bool fastSelectInstruction(const Instruction *I) = 0;
 
-  /// \brief This method is called by target-independent code to do target-
+  /// This method is called by target-independent code to do target-
   /// specific argument lowering. It returns true if it was successful.
   virtual bool fastLowerArguments();
 
-  /// \brief This method is called by target-independent code to do target-
+  /// This method is called by target-independent code to do target-
   /// specific call lowering. It returns true if it was successful.
   virtual bool fastLowerCall(CallLoweringInfo &CLI);
 
-  /// \brief This method is called by target-independent code to do target-
+  /// This method is called by target-independent code to do target-
   /// specific intrinsic lowering. It returns true if it was successful.
   virtual bool fastLowerIntrinsicCall(const IntrinsicInst *II);
 
-  /// \brief This method is called by target-independent code to request that an
+  /// This method is called by target-independent code to request that an
   /// instruction with the given type and opcode be emitted.
   virtual unsigned fastEmit_(MVT VT, MVT RetVT, unsigned Opcode);
 
-  /// \brief This method is called by target-independent code to request that an
+  /// This method is called by target-independent code to request that an
   /// instruction with the given type, opcode, and register operand be emitted.
   virtual unsigned fastEmit_r(MVT VT, MVT RetVT, unsigned Opcode, unsigned Op0,
                               bool Op0IsKill);
 
-  /// \brief This method is called by target-independent code to request that an
+  /// This method is called by target-independent code to request that an
   /// instruction with the given type, opcode, and register operands be emitted.
   virtual unsigned fastEmit_rr(MVT VT, MVT RetVT, unsigned Opcode, unsigned Op0,
                                bool Op0IsKill, unsigned Op1, bool Op1IsKill);
 
-  /// \brief This method is called by target-independent code to request that an
+  /// This method is called by target-independent code to request that an
   /// instruction with the given type, opcode, and register and immediate
   /// operands be emitted.
   virtual unsigned fastEmit_ri(MVT VT, MVT RetVT, unsigned Opcode, unsigned Op0,
                                bool Op0IsKill, uint64_t Imm);
 
-  /// \brief This method is a wrapper of fastEmit_ri.
+  /// This method is a wrapper of fastEmit_ri.
   ///
   /// It first tries to emit an instruction with an immediate operand using
   /// fastEmit_ri.  If that fails, it materializes the immediate into a register
@@ -371,89 +378,89 @@ protected:
   unsigned fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, bool Op0IsKill,
                         uint64_t Imm, MVT ImmType);
 
-  /// \brief This method is called by target-independent code to request that an
+  /// This method is called by target-independent code to request that an
   /// instruction with the given type, opcode, and immediate operand be emitted.
   virtual unsigned fastEmit_i(MVT VT, MVT RetVT, unsigned Opcode, uint64_t Imm);
 
-  /// \brief This method is called by target-independent code to request that an
+  /// This method is called by target-independent code to request that an
   /// instruction with the given type, opcode, and floating-point immediate
   /// operand be emitted.
   virtual unsigned fastEmit_f(MVT VT, MVT RetVT, unsigned Opcode,
                               const ConstantFP *FPImm);
 
-  /// \brief Emit a MachineInstr with no operands and a result register in the
+  /// Emit a MachineInstr with no operands and a result register in the
   /// given register class.
   unsigned fastEmitInst_(unsigned MachineInstOpcode,
                          const TargetRegisterClass *RC);
 
-  /// \brief Emit a MachineInstr with one register operand and a result register
+  /// Emit a MachineInstr with one register operand and a result register
   /// in the given register class.
   unsigned fastEmitInst_r(unsigned MachineInstOpcode,
                           const TargetRegisterClass *RC, unsigned Op0,
                           bool Op0IsKill);
 
-  /// \brief Emit a MachineInstr with two register operands and a result
+  /// Emit a MachineInstr with two register operands and a result
   /// register in the given register class.
   unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
                            const TargetRegisterClass *RC, unsigned Op0,
                            bool Op0IsKill, unsigned Op1, bool Op1IsKill);
 
-  /// \brief Emit a MachineInstr with three register operands and a result
+  /// Emit a MachineInstr with three register operands and a result
   /// register in the given register class.
   unsigned fastEmitInst_rrr(unsigned MachineInstOpcode,
                             const TargetRegisterClass *RC, unsigned Op0,
                             bool Op0IsKill, unsigned Op1, bool Op1IsKill,
                             unsigned Op2, bool Op2IsKill);
 
-  /// \brief Emit a MachineInstr with a register operand, an immediate, and a
+  /// Emit a MachineInstr with a register operand, an immediate, and a
   /// result register in the given register class.
   unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
                            const TargetRegisterClass *RC, unsigned Op0,
                            bool Op0IsKill, uint64_t Imm);
 
-  /// \brief Emit a MachineInstr with one register operand and two immediate
+  /// Emit a MachineInstr with one register operand and two immediate
   /// operands.
   unsigned fastEmitInst_rii(unsigned MachineInstOpcode,
                             const TargetRegisterClass *RC, unsigned Op0,
                             bool Op0IsKill, uint64_t Imm1, uint64_t Imm2);
 
-  /// \brief Emit a MachineInstr with a floating point immediate, and a result
+  /// Emit a MachineInstr with a floating point immediate, and a result
   /// register in the given register class.
   unsigned fastEmitInst_f(unsigned MachineInstOpcode,
                           const TargetRegisterClass *RC,
                           const ConstantFP *FPImm);
 
-  /// \brief Emit a MachineInstr with two register operands, an immediate, and a
+  /// Emit a MachineInstr with two register operands, an immediate, and a
   /// result register in the given register class.
   unsigned fastEmitInst_rri(unsigned MachineInstOpcode,
                             const TargetRegisterClass *RC, unsigned Op0,
                             bool Op0IsKill, unsigned Op1, bool Op1IsKill,
                             uint64_t Imm);
 
-  /// \brief Emit a MachineInstr with a single immediate operand, and a result
+  /// Emit a MachineInstr with a single immediate operand, and a result
   /// register in the given register class.
-  unsigned fastEmitInst_i(unsigned MachineInstrOpcode,
+  unsigned fastEmitInst_i(unsigned MachineInstOpcode,
                           const TargetRegisterClass *RC, uint64_t Imm);
 
-  /// \brief Emit a MachineInstr for an extract_subreg from a specified index of
+  /// Emit a MachineInstr for an extract_subreg from a specified index of
   /// a superregister to a specified type.
   unsigned fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill,
                                       uint32_t Idx);
 
-  /// \brief Emit MachineInstrs to compute the value of Op with all but the
+  /// Emit MachineInstrs to compute the value of Op with all but the
   /// least significant bit set to zero.
   unsigned fastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill);
 
-  /// \brief Emit an unconditional branch to the given block, unless it is the
+  /// Emit an unconditional branch to the given block, unless it is the
   /// immediate (fall-through) successor, and update the CFG.
-  void fastEmitBranch(MachineBasicBlock *MBB, const DebugLoc &DL);
+  void fastEmitBranch(MachineBasicBlock *MSucc, const DebugLoc &DbgLoc);
 
   /// Emit an unconditional branch to \p FalseMBB, obtains the branch weight
   /// and adds TrueMBB and FalseMBB to the successor list.
   void finishCondBranch(const BasicBlock *BranchBB, MachineBasicBlock *TrueMBB,
                         MachineBasicBlock *FalseMBB);
 
-  /// \brief Update the value map to include the new mapping for this
+  /// Update the value map to include the new mapping for this
   /// instruction, or insert an extra copy to get the result in a previous
   /// determined register.
   ///
@@ -464,26 +471,26 @@ protected:
 
   unsigned createResultReg(const TargetRegisterClass *RC);
 
-  /// \brief Try to constrain Op so that it is usable by argument OpNum of the
+  /// Try to constrain Op so that it is usable by argument OpNum of the
   /// provided MCInstrDesc. If this fails, create a new virtual register in the
   /// correct class and COPY the value there.
   unsigned constrainOperandRegClass(const MCInstrDesc &II, unsigned Op,
                                     unsigned OpNum);
 
-  /// \brief Emit a constant in a register using target-specific logic, such as
+  /// Emit a constant in a register using target-specific logic, such as
   /// constant pool loads.
   virtual unsigned fastMaterializeConstant(const Constant *C) { return 0; }
 
-  /// \brief Emit an alloca address in a register using target-specific logic.
+  /// Emit an alloca address in a register using target-specific logic.
   virtual unsigned fastMaterializeAlloca(const AllocaInst *C) { return 0; }
 
-  /// \brief Emit the floating-point constant +0.0 in a register using target-
+  /// Emit the floating-point constant +0.0 in a register using target-
   /// specific logic.
   virtual unsigned fastMaterializeFloatZero(const ConstantFP *CF) {
     return 0;
   }
 
-  /// \brief Check if \c Add is an add that can be safely folded into \c GEP.
+  /// Check if \c Add is an add that can be safely folded into \c GEP.
   ///
   /// \c Add can be folded into \c GEP if:
   /// - \c Add is an add,
@@ -492,16 +499,16 @@ protected:
   /// - \c Add has a constant operand.
   bool canFoldAddIntoGEP(const User *GEP, const Value *Add);
 
-  /// \brief Test whether the given value has exactly one use.
+  /// Test whether the given value has exactly one use.
   bool hasTrivialKill(const Value *V);
 
-  /// \brief Create a machine mem operand from the given instruction.
+  /// Create a machine mem operand from the given instruction.
   MachineMemOperand *createMachineMemOperandFor(const Instruction *I) const;
 
   CmpInst::Predicate optimizeCmpPredicate(const CmpInst *CI) const;
 
   bool lowerCallTo(const CallInst *CI, MCSymbol *Symbol, unsigned NumArgs);
-  bool lowerCallTo(const CallInst *CI, const char *SymbolName,
+  bool lowerCallTo(const CallInst *CI, const char *SymName,
                    unsigned NumArgs);
   bool lowerCallTo(CallLoweringInfo &CLI);
 
@@ -518,23 +525,24 @@ protected:
   }
 
   bool lowerCall(const CallInst *I);
-  /// \brief Select and emit code for a binary operator instruction, which has
+  /// Select and emit code for a binary operator instruction, which has
   /// an opcode which directly corresponds to the given ISD opcode.
   bool selectBinaryOp(const User *I, unsigned ISDOpcode);
   bool selectFNeg(const User *I);
   bool selectGetElementPtr(const User *I);
   bool selectStackmap(const CallInst *I);
   bool selectPatchpoint(const CallInst *I);
-  bool selectCall(const User *Call);
+  bool selectCall(const User *I);
   bool selectIntrinsicCall(const IntrinsicInst *II);
   bool selectBitCast(const User *I);
   bool selectCast(const User *I, unsigned Opcode);
-  bool selectExtractValue(const User *I);
+  bool selectExtractValue(const User *U);
   bool selectInsertValue(const User *I);
   bool selectXRayCustomEvent(const CallInst *II);
+  bool selectXRayTypedEvent(const CallInst *II);
 
 private:
-  /// \brief Handle PHI nodes in successor blocks.
+  /// Handle PHI nodes in successor blocks.
   ///
   /// Emit code to ensure constants are copied into registers when needed.
   /// Remember the virtual registers that need to be added to the Machine PHI
@@ -543,27 +551,41 @@ private:
   /// correspond to a different MBB than the end.
   bool handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
 
-  /// \brief Helper for materializeRegForValue to materialize a constant in a
+  /// Helper for materializeRegForValue to materialize a constant in a
   /// target-independent way.
   unsigned materializeConstant(const Value *V, MVT VT);
 
-  /// \brief Helper for getRegForVale. This function is called when the value
+  /// Helper for getRegForVale. This function is called when the value
   /// isn't already available in a register and must be materialized with new
   /// instructions.
   unsigned materializeRegForValue(const Value *V, MVT VT);
 
-  /// \brief Clears LocalValueMap and moves the area for the new local variables
+  /// Clears LocalValueMap and moves the area for the new local variables
   /// to the beginning of the block. It helps to avoid spilling cached variables
   /// across heavy instructions like calls.
   void flushLocalValueMap();
 
-  /// \brief Removes dead local value instructions after SavedLastLocalvalue.
+  /// Removes dead local value instructions after SavedLastLocalvalue.
   void removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue);
 
-  /// \brief Insertion point before trying to select the current instruction.
+  struct InstOrderMap {
+    DenseMap<MachineInstr *, unsigned> Orders;
+    MachineInstr *FirstTerminator = nullptr;
+    unsigned FirstTerminatorOrder = std::numeric_limits<unsigned>::max();
+
+    void initialize(MachineBasicBlock *MBB,
+                    MachineBasicBlock::iterator LastFlushPoint);
+  };
+
+  /// Sinks the local value materialization instruction LocalMI to its first use
+  /// in the basic block, or deletes it if it is not used.
+  void sinkLocalValueMaterialization(MachineInstr &LocalMI, unsigned DefReg,
+                                     InstOrderMap &OrderMap);
+
+  /// Insertion point before trying to select the current instruction.
   MachineBasicBlock::iterator SavedInsertPt;
 
-  /// \brief Add a stackmap or patchpoint intrinsic call's live variable
+  /// Add a stackmap or patchpoint intrinsic call's live variable
   /// operands to a stackmap or patchpoint machine instruction.
   bool addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops,
                            const CallInst *CI, unsigned StartIdx);
diff --git a/include/llvm/CodeGen/FunctionLoweringInfo.h b/include/llvm/CodeGen/FunctionLoweringInfo.h
index 3b39d87ffb4a..2da00b7d61ab 100644
--- a/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -118,6 +118,17 @@ public:
   /// cross-basic-block values.
   DenseMap<const Value *, unsigned> ValueMap;
 
+  /// VirtReg2Value map is needed by the Divergence Analysis driven
+  /// instruction selection. It is reverted ValueMap. It is computed
+  /// in lazy style - on demand. It is used to get the Value corresponding
+  /// to the live in virtual register and is called from the
+  /// TargetLowerinInfo::isSDNodeSourceOfDivergence.
+  DenseMap<unsigned, const Value*> VirtReg2Value;
+
+  /// This method is called from TargetLowerinInfo::isSDNodeSourceOfDivergence
+  /// to get the Value corresponding to the live-in virtual register.
+  const Value * getValueFromVirtualReg(unsigned Vreg);
+
   /// Track virtual registers created for exception pointers.
   DenseMap<const Value *, unsigned> CatchPadExceptionPointers;
 
@@ -167,6 +178,8 @@ public:
   /// RegFixups - Registers which need to be replaced after isel is done.
   DenseMap<unsigned, unsigned> RegFixups;
 
+  DenseSet<unsigned> RegsWithFixups;
+
   /// StatepointStackSlots - A list of temporary stack slots (frame indices)
   /// used to spill values at a statepoint.  We store them here to enable
   /// reuse of the same stack slots across different statepoints in different
diff --git a/include/llvm/CodeGen/GCStrategy.h b/include/llvm/CodeGen/GCStrategy.h
index 16168e785f81..91604fd2df87 100644
--- a/include/llvm/CodeGen/GCStrategy.h
+++ b/include/llvm/CodeGen/GCStrategy.h
@@ -105,12 +105,12 @@ public:
 
   /// By default, write barriers are replaced with simple store
   /// instructions. If true, you must provide a custom pass to lower 
-  /// calls to @llvm.gcwrite.
+  /// calls to \@llvm.gcwrite.
   bool customWriteBarrier() const { return CustomWriteBarriers; }
 
   /// By default, read barriers are replaced with simple load
   /// instructions. If true, you must provide a custom pass to lower 
-  /// calls to @llvm.gcread.
+  /// calls to \@llvm.gcread.
   bool customReadBarrier() const { return CustomReadBarriers; }
 
   /// Returns true if this strategy is expecting the use of gc.statepoints,
@@ -147,7 +147,7 @@ public:
 
   /// By default, roots are left for the code generator so it can generate a
   /// stack map. If true, you must provide a custom pass to lower 
-  /// calls to @llvm.gcroot.
+  /// calls to \@llvm.gcroot.
   bool customRoots() const { return CustomRoots; }
 
   /// If set, gcroot intrinsics should initialize their allocas to null
diff --git a/include/llvm/CodeGen/GlobalISel/CallLowering.h b/include/llvm/CodeGen/GlobalISel/CallLowering.h
index ba84d76de164..58eb412d8c24 100644
--- a/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ b/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -17,11 +17,11 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/CodeGen/TargetCallingConv.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
 #include <cstdint>
 #include <functional>
 
@@ -123,7 +123,7 @@ protected:
   }
 
   template <typename FuncInfoTy>
-  void setArgFlags(ArgInfo &Arg, unsigned OpNum, const DataLayout &DL,
+  void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL,
                    const FuncInfoTy &FuncInfo) const;
 
   /// Invoke Handler::assignArg on each of the given \p Args and then use
@@ -131,7 +131,7 @@ protected:
   ///
   /// \return True if everything has succeeded, false otherwise.
   bool handleAssignments(MachineIRBuilder &MIRBuilder, ArrayRef<ArgInfo> Args,
-                         ValueHandler &Callback) const;
+                         ValueHandler &Handler) const;
 
 public:
   CallLowering(const TargetLowering *TLI) : TLI(TLI) {}
diff --git a/include/llvm/CodeGen/GlobalISel/Combiner.h b/include/llvm/CodeGen/GlobalISel/Combiner.h
new file mode 100644
index 000000000000..36a33deb4a64
--- /dev/null
+++ b/include/llvm/CodeGen/GlobalISel/Combiner.h
@@ -0,0 +1,43 @@
+//== ----- llvm/CodeGen/GlobalISel/Combiner.h --------------------- == //
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// This contains common code to drive combines. Combiner Passes will need to
+/// setup a CombinerInfo and call combineMachineFunction.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_GLOBALISEL_COMBINER_H
+#define LLVM_CODEGEN_GLOBALISEL_COMBINER_H
+
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+class MachineRegisterInfo;
+class CombinerInfo;
+class TargetPassConfig;
+class MachineFunction;
+
+class Combiner {
+public:
+  Combiner(CombinerInfo &CombinerInfo, const TargetPassConfig *TPC);
+
+  bool combineMachineInstrs(MachineFunction &MF);
+
+protected:
+  CombinerInfo &CInfo;
+
+  MachineRegisterInfo *MRI = nullptr;
+  const TargetPassConfig *TPC;
+  MachineIRBuilder Builder;
+};
+
+} // End namespace llvm.
+
+#endif // LLVM_CODEGEN_GLOBALISEL_GICOMBINER_H
diff --git a/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
new file mode 100644
index 000000000000..5d5b8398452c
--- /dev/null
+++ b/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -0,0 +1,44 @@
+//== llvm/CodeGen/GlobalISel/CombinerHelper.h -------------- -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--------------------------------------------------------------------===//
+//
+/// This contains common combine transformations that may be used in a combine
+/// pass,or by the target elsewhere.
+/// Targets can pick individual opcode transformations from the helper or use
+/// tryCombine which invokes all transformations. All of the transformations
+/// return true if the MachineInstruction changed and false otherwise.
+//
+//===--------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_GLOBALISEL_COMBINER_HELPER_H
+#define LLVM_CODEGEN_GLOBALISEL_COMBINER_HELPER_H
+
+namespace llvm {
+
+class MachineIRBuilder;
+class MachineRegisterInfo;
+class MachineInstr;
+
+class CombinerHelper {
+  MachineIRBuilder &Builder;
+  MachineRegisterInfo &MRI;
+
+public:
+  CombinerHelper(MachineIRBuilder &B);
+
+  /// If \p MI is COPY, try to combine it.
+  /// Returns true if MI changed.
+  bool tryCombineCopy(MachineInstr &MI);
+
+  /// Try to transform \p MI by using all of the above
+  /// combine functions. Returns true if changed.
+  bool tryCombine(MachineInstr &MI);
+};
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/CodeGen/GlobalISel/CombinerInfo.h b/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
new file mode 100644
index 000000000000..1d248547adbf
--- /dev/null
+++ b/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
@@ -0,0 +1,48 @@
+//===- llvm/CodeGen/GlobalISel/CombinerInfo.h ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// Interface for Targets to specify which operations are combined how and when.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_GLOBALISEL_COMBINER_INFO_H
+#define LLVM_CODEGEN_GLOBALISEL_COMBINER_INFO_H
+
+#include <cassert>
+namespace llvm {
+
+class LegalizerInfo;
+class MachineInstr;
+class MachineIRBuilder;
+class MachineRegisterInfo;
+// Contains information relevant to enabling/disabling various combines for a
+// pass.
+class CombinerInfo {
+public:
+  CombinerInfo(bool AllowIllegalOps, bool ShouldLegalizeIllegal,
+               LegalizerInfo *LInfo)
+      : IllegalOpsAllowed(AllowIllegalOps),
+        LegalizeIllegalOps(ShouldLegalizeIllegal), LInfo(LInfo) {
+    assert(((AllowIllegalOps || !LegalizeIllegalOps) || LInfo) &&
+           "Expecting legalizerInfo when illegalops not allowed");
+  }
+  virtual ~CombinerInfo() = default;
+  /// If \p IllegalOpsAllowed is false, the CombinerHelper will make use of
+  /// the legalizerInfo to check for legality before each transformation.
+  bool IllegalOpsAllowed; // TODO: Make use of this.
+
+  /// If \p LegalizeIllegalOps is true, the Combiner will also legalize the
+  /// illegal ops that are created.
+  bool LegalizeIllegalOps; // TODO: Make use of this.
+  const LegalizerInfo *LInfo;
+  virtual bool combine(MachineInstr &MI, MachineIRBuilder &B) const = 0;
+};
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h b/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h
new file mode 100644
index 000000000000..8d61f9a68279
--- /dev/null
+++ b/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h
@@ -0,0 +1,134 @@
+//===-- llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h  --*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements a version of MachineIRBuilder which does trivial
+/// constant folding.
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+
+namespace llvm {
+
+static Optional<APInt> ConstantFoldBinOp(unsigned Opcode, const unsigned Op1,
+                                         const unsigned Op2,
+                                         const MachineRegisterInfo &MRI) {
+  auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI);
+  auto MaybeOp2Cst = getConstantVRegVal(Op2, MRI);
+  if (MaybeOp1Cst && MaybeOp2Cst) {
+    LLT Ty = MRI.getType(Op1);
+    APInt C1(Ty.getSizeInBits(), *MaybeOp1Cst, true);
+    APInt C2(Ty.getSizeInBits(), *MaybeOp2Cst, true);
+    switch (Opcode) {
+    default:
+      break;
+    case TargetOpcode::G_ADD:
+      return C1 + C2;
+    case TargetOpcode::G_AND:
+      return C1 & C2;
+    case TargetOpcode::G_ASHR:
+      return C1.ashr(C2);
+    case TargetOpcode::G_LSHR:
+      return C1.lshr(C2);
+    case TargetOpcode::G_MUL:
+      return C1 * C2;
+    case TargetOpcode::G_OR:
+      return C1 | C2;
+    case TargetOpcode::G_SHL:
+      return C1 << C2;
+    case TargetOpcode::G_SUB:
+      return C1 - C2;
+    case TargetOpcode::G_XOR:
+      return C1 ^ C2;
+    case TargetOpcode::G_UDIV:
+      if (!C2.getBoolValue())
+        break;
+      return C1.udiv(C2);
+    case TargetOpcode::G_SDIV:
+      if (!C2.getBoolValue())
+        break;
+      return C1.sdiv(C2);
+    case TargetOpcode::G_UREM:
+      if (!C2.getBoolValue())
+        break;
+      return C1.urem(C2);
+    case TargetOpcode::G_SREM:
+      if (!C2.getBoolValue())
+        break;
+      return C1.srem(C2);
+    }
+  }
+  return None;
+}
+
+/// An MIRBuilder which does trivial constant folding of binary ops.
+/// Calls to buildInstr will also try to constant fold binary ops.
+class ConstantFoldingMIRBuilder
+    : public FoldableInstructionsBuilder<ConstantFoldingMIRBuilder> {
+public:
+  // Pull in base class constructors.
+  using FoldableInstructionsBuilder<
+      ConstantFoldingMIRBuilder>::FoldableInstructionsBuilder;
+  // Unhide buildInstr
+  using FoldableInstructionsBuilder<ConstantFoldingMIRBuilder>::buildInstr;
+
+  // Implement buildBinaryOp required by FoldableInstructionsBuilder which
+  // tries to constant fold.
+  MachineInstrBuilder buildBinaryOp(unsigned Opcode, unsigned Dst,
+                                    unsigned Src0, unsigned Src1) {
+    validateBinaryOp(Dst, Src0, Src1);
+    auto MaybeCst = ConstantFoldBinOp(Opcode, Src0, Src1, getMF().getRegInfo());
+    if (MaybeCst)
+      return buildConstant(Dst, MaybeCst->getSExtValue());
+    return buildInstr(Opcode).addDef(Dst).addUse(Src0).addUse(Src1);
+  }
+
+  template <typename DstTy, typename UseArg1Ty, typename UseArg2Ty>
+  MachineInstrBuilder buildInstr(unsigned Opc, DstTy &&Ty, UseArg1Ty &&Arg1,
+                                 UseArg2Ty &&Arg2) {
+    unsigned Dst = getDestFromArg(Ty);
+    return buildInstr(Opc, Dst, getRegFromArg(std::forward<UseArg1Ty>(Arg1)),
+                      getRegFromArg(std::forward<UseArg2Ty>(Arg2)));
+  }
+
+  // Try to provide an overload for buildInstr for binary ops in order to
+  // constant fold.
+  MachineInstrBuilder buildInstr(unsigned Opc, unsigned Dst, unsigned Src0,
+                                 unsigned Src1) {
+    switch (Opc) {
+    default:
+      break;
+    case TargetOpcode::G_ADD:
+    case TargetOpcode::G_AND:
+    case TargetOpcode::G_ASHR:
+    case TargetOpcode::G_LSHR:
+    case TargetOpcode::G_MUL:
+    case TargetOpcode::G_OR:
+    case TargetOpcode::G_SHL:
+    case TargetOpcode::G_SUB:
+    case TargetOpcode::G_XOR:
+    case TargetOpcode::G_UDIV:
+    case TargetOpcode::G_SDIV:
+    case TargetOpcode::G_UREM:
+    case TargetOpcode::G_SREM: {
+      return buildBinaryOp(Opc, Dst, Src0, Src1);
+    }
+    }
+    return buildInstr(Opc).addDef(Dst).addUse(Src0).addUse(Src1);
+  }
+
+  // Fallback implementation of buildInstr.
+  template <typename DstTy, typename... UseArgsTy>
+  MachineInstrBuilder buildInstr(unsigned Opc, DstTy &&Ty,
+                                 UseArgsTy &&... Args) {
+    auto MIB = buildInstr(Opc).addDef(getDestFromArg(Ty));
+    addUsesFromArgs(MIB, std::forward<UseArgsTy>(Args)...);
+    return MIB;
+  }
+};
+} // namespace llvm
diff --git a/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/include/llvm/CodeGen/GlobalISel/IRTranslator.h
index 7061c014d9b7..f3553966fcdf 100644
--- a/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -24,6 +24,7 @@
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/GlobalISel/Types.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/IR/Intrinsics.h"
 #include <memory>
 #include <utility>
@@ -63,9 +64,83 @@ private:
   /// Interface used to lower the everything related to calls.
   const CallLowering *CLI;
 
-  /// Mapping of the values of the current LLVM IR function
-  /// to the related virtual registers.
-  ValueToVReg ValToVReg;
+  /// This class contains the mapping between the Values to vreg related data.
+  class ValueToVRegInfo {
+  public:
+    ValueToVRegInfo() = default;
+
+    using VRegListT = SmallVector<unsigned, 1>;
+    using OffsetListT = SmallVector<uint64_t, 1>;
+
+    using const_vreg_iterator =
+        DenseMap<const Value *, VRegListT *>::const_iterator;
+    using const_offset_iterator =
+        DenseMap<const Value *, OffsetListT *>::const_iterator;
+
+    inline const_vreg_iterator vregs_end() const { return ValToVRegs.end(); }
+
+    VRegListT *getVRegs(const Value &V) {
+      auto It = ValToVRegs.find(&V);
+      if (It != ValToVRegs.end())
+        return It->second;
+
+      return insertVRegs(V);
+    }
+
+    OffsetListT *getOffsets(const Value &V) {
+      auto It = TypeToOffsets.find(V.getType());
+      if (It != TypeToOffsets.end())
+        return It->second;
+
+      return insertOffsets(V);
+    }
+
+    const_vreg_iterator findVRegs(const Value &V) const {
+      return ValToVRegs.find(&V);
+    }
+
+    bool contains(const Value &V) const {
+      return ValToVRegs.find(&V) != ValToVRegs.end();
+    }
+
+    void reset() {
+      ValToVRegs.clear();
+      TypeToOffsets.clear();
+      VRegAlloc.DestroyAll();
+      OffsetAlloc.DestroyAll();
+    }
+
+  private:
+    VRegListT *insertVRegs(const Value &V) {
+      assert(ValToVRegs.find(&V) == ValToVRegs.end() && "Value already exists");
+
+      // We placement new using our fast allocator since we never try to free
+      // the vectors until translation is finished.
+      auto *VRegList = new (VRegAlloc.Allocate()) VRegListT();
+      ValToVRegs[&V] = VRegList;
+      return VRegList;
+    }
+
+    OffsetListT *insertOffsets(const Value &V) {
+      assert(TypeToOffsets.find(V.getType()) == TypeToOffsets.end() &&
+             "Type already exists");
+
+      auto *OffsetList = new (OffsetAlloc.Allocate()) OffsetListT();
+      TypeToOffsets[V.getType()] = OffsetList;
+      return OffsetList;
+    }
+    SpecificBumpPtrAllocator<VRegListT> VRegAlloc;
+    SpecificBumpPtrAllocator<OffsetListT> OffsetAlloc;
+
+    // We store pointers to vectors here since references may be invalidated
+    // while we hold them if we stored the vectors directly.
+    DenseMap<const Value *, VRegListT*> ValToVRegs;
+    DenseMap<const Type *, OffsetListT*> TypeToOffsets;
+  };
+
+  /// Mapping of the values of the current LLVM IR function to the related
+  /// virtual registers and offsets.
+  ValueToVRegInfo VMap;
 
   // N.b. it's not completely obvious that this will be sufficient for every
   // LLVM IR construct (with "invoke" being the obvious candidate to mess up our
@@ -82,7 +157,8 @@ private:
 
   // List of stubbed PHI instructions, for values and basic blocks to be filled
   // in once all MachineBasicBlocks have been created.
-  SmallVector<std::pair<const PHINode *, MachineInstr *>, 4> PendingPHIs;
+  SmallVector<std::pair<const PHINode *, SmallVector<MachineInstr *, 1>>, 4>
+      PendingPHIs;
 
   /// Record of what frame index has been allocated to specified allocas for
   /// this function.
@@ -99,7 +175,7 @@ private:
   /// The general algorithm is:
   /// 1. Look for a virtual register for each operand or
   ///    create one.
-  /// 2 Update the ValToVReg accordingly.
+  /// 2 Update the VMap accordingly.
   /// 2.alt. For constant arguments, if they are compile time constants,
   ///   produce an immediate in the right operand and do not touch
   ///   ValToReg. Actually we will go with a virtual register for each
@@ -134,7 +210,7 @@ private:
 
   /// Translate an LLVM string intrinsic (memcpy, memset, ...).
   bool translateMemfunc(const CallInst &CI, MachineIRBuilder &MIRBuilder,
-                        unsigned Intrinsic);
+                        unsigned ID);
 
   void getStackGuard(unsigned DstReg, MachineIRBuilder &MIRBuilder);
 
@@ -146,6 +222,19 @@ private:
 
   bool translateInlineAsm(const CallInst &CI, MachineIRBuilder &MIRBuilder);
 
+  // FIXME: temporary function to expose previous interface to call lowering
+  // until it is refactored.
+  /// Combines all component registers of \p V into a single scalar with size
+  /// "max(Offsets) + last size".
+  unsigned packRegs(const Value &V, MachineIRBuilder &MIRBuilder);
+
+  void unpackRegs(const Value &V, unsigned Src, MachineIRBuilder &MIRBuilder);
+
+  /// Returns true if the value should be split into multiple LLTs.
+  /// If \p Offsets is given then the split type's offsets will be stored in it.
+  bool valueIsSplit(const Value &V,
+                    SmallVectorImpl<uint64_t> *Offsets = nullptr);
+
   /// Translate call instruction.
   /// \pre \p U is a call instruction.
   bool translateCall(const User &U, MachineIRBuilder &MIRBuilder);
@@ -310,6 +399,9 @@ private:
 
   bool translateShuffleVector(const User &U, MachineIRBuilder &MIRBuilder);
 
+  bool translateAtomicCmpXchg(const User &U, MachineIRBuilder &MIRBuilder);
+  bool translateAtomicRMW(const User &U, MachineIRBuilder &MIRBuilder);
+
   // Stubs to keep the compiler happy while we implement the rest of the
   // translation.
   bool translateResume(const User &U, MachineIRBuilder &MIRBuilder) {
@@ -327,14 +419,8 @@ private:
   bool translateFence(const User &U, MachineIRBuilder &MIRBuilder) {
     return false;
   }
-  bool translateAtomicCmpXchg(const User &U, MachineIRBuilder &MIRBuilder) {
-    return false;
-  }
-  bool translateAtomicRMW(const User &U, MachineIRBuilder &MIRBuilder) {
-    return false;
-  }
   bool translateAddrSpaceCast(const User &U, MachineIRBuilder &MIRBuilder) {
-    return false;
+    return translateCast(TargetOpcode::G_ADDRSPACE_CAST, U, MIRBuilder);
   }
   bool translateCleanupPad(const User &U, MachineIRBuilder &MIRBuilder) {
     return false;
@@ -381,9 +467,24 @@ private:
   // * Clear the different maps.
   void finalizeFunction();
 
-  /// Get the VReg that represents \p Val.
-  /// If such VReg does not exist, it is created.
-  unsigned getOrCreateVReg(const Value &Val);
+  /// Get the VRegs that represent \p Val.
+  /// Non-aggregate types have just one corresponding VReg and the list can be
+  /// used as a single "unsigned". Aggregates get flattened. If such VRegs do
+  /// not exist, they are created.
+  ArrayRef<unsigned> getOrCreateVRegs(const Value &Val);
+
+  unsigned getOrCreateVReg(const Value &Val) {
+    auto Regs = getOrCreateVRegs(Val);
+    if (Regs.empty())
+      return 0;
+    assert(Regs.size() == 1 &&
+           "attempt to get single VReg for aggregate or void");
+    return Regs[0];
+  }
+
+  /// Allocate some vregs and offsets in the VMap. Then populate just the
+  /// offsets while leaving the vregs empty.
+  ValueToVRegInfo::VRegListT &allocateVRegs(const Value &Val);
 
   /// Get the frame index that represents \p Val.
   /// If such VReg does not exist, it is created.
diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
index 4264a866b6c0..471def7f45a3 100644
--- a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
+++ b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
@@ -20,6 +20,7 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/CodeGenCoverage.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
 #include <bitset>
 #include <cstddef>
 #include <cstdint>
@@ -31,7 +32,6 @@ namespace llvm {
 
 class APInt;
 class APFloat;
-class LLT;
 class MachineInstr;
 class MachineInstrBuilder;
 class MachineFunction;
@@ -81,6 +81,23 @@ enum {
   ///        failed match.
   GIM_Try,
 
+  /// Switch over the opcode on the specified instruction
+  /// - InsnID - Instruction ID
+  /// - LowerBound - numerically minimum opcode supported
+  /// - UpperBound - numerically maximum + 1 opcode supported
+  /// - Default - failure jump target
+  /// - JumpTable... - (UpperBound - LowerBound) (at least 2) jump targets
+  GIM_SwitchOpcode,
+
+  /// Switch over the LLT on the specified instruction operand
+  /// - InsnID - Instruction ID
+  /// - OpIdx - Operand index
+  /// - LowerBound - numerically minimum Type ID supported
+  /// - UpperBound - numerically maximum + 1 Type ID supported
+  /// - Default - failure jump target
+  /// - JumpTable... - (UpperBound - LowerBound) (at least 2) jump targets
+  GIM_SwitchType,
+
   /// Record the specified instruction
   /// - NewInsnID - Instruction ID to define
   /// - InsnID - Instruction ID
@@ -117,6 +134,23 @@ enum {
   GIM_CheckAtomicOrdering,
   GIM_CheckAtomicOrderingOrStrongerThan,
   GIM_CheckAtomicOrderingWeakerThan,
+  /// Check the size of the memory access for the given machine memory operand.
+  /// - InsnID - Instruction ID
+  /// - MMOIdx - MMO index
+  /// - Size - The size in bytes of the memory access
+  GIM_CheckMemorySizeEqualTo,
+  /// Check the size of the memory access for the given machine memory operand
+  /// against the size of an operand.
+  /// - InsnID - Instruction ID
+  /// - MMOIdx - MMO index
+  /// - OpIdx - The operand index to compare the MMO against
+  GIM_CheckMemorySizeEqualToLLT,
+  GIM_CheckMemorySizeLessThanLLT,
+  GIM_CheckMemorySizeGreaterThanLLT,
+  /// Check a generic C++ instruction predicate
+  /// - InsnID - Instruction ID
+  /// - PredicateID - The ID of the predicate function to call
+  GIM_CheckCxxInsnPredicate,
 
   /// Check the type for the specified operand
   /// - InsnID - Instruction ID
@@ -133,12 +167,14 @@ enum {
   /// - OpIdx - Operand index
   /// - Expected register bank (specified as a register class)
   GIM_CheckRegBankForClass,
+
   /// Check the operand matches a complex predicate
   /// - InsnID - Instruction ID
   /// - OpIdx - Operand index
   /// - RendererID - The renderer to hold the result
   /// - Complex predicate ID
   GIM_CheckComplexPattern,
+
   /// Check the operand is a specific integer
   /// - InsnID - Instruction ID
   /// - OpIdx - Operand index
@@ -155,6 +191,7 @@ enum {
   /// - OpIdx - Operand index
   /// - Expected Intrinsic ID
   GIM_CheckIntrinsicID,
+
   /// Check the specified operand is an MBB
   /// - InsnID - Instruction ID
   /// - OpIdx - Operand index
@@ -183,6 +220,7 @@ enum {
   /// - OldInsnID - Instruction ID to mutate
   /// - NewOpcode - The new opcode to use
   GIR_MutateOpcode,
+
   /// Build a new instruction
   /// - InsnID - Instruction ID to define
   /// - Opcode - The new opcode to use
@@ -193,6 +231,7 @@ enum {
   /// - OldInsnID - Instruction ID to copy from
   /// - OpIdx - The operand to copy
   GIR_Copy,
+
   /// Copy an operand to the specified instruction or add a zero register if the
   /// operand is a zero immediate.
   /// - NewInsnID - Instruction ID to modify
@@ -206,6 +245,7 @@ enum {
   /// - OpIdx - The operand to copy
   /// - SubRegIdx - The subregister to copy
   GIR_CopySubReg,
+
   /// Add an implicit register def to the specified instruction
   /// - InsnID - Instruction ID to modify
   /// - RegNum - The register to add
@@ -218,10 +258,13 @@ enum {
   /// - InsnID - Instruction ID to modify
   /// - RegNum - The register to add
   GIR_AddRegister,
-  /// Add a a temporary register to the specified instruction
+
+  /// Add a temporary register to the specified instruction
   /// - InsnID - Instruction ID to modify
   /// - TempRegID - The temporary register ID to add
+  /// - TempRegFlags - The register flags to set
   GIR_AddTempRegister,
+
   /// Add an immediate to the specified instruction
   /// - InsnID - Instruction ID to modify
   /// - Imm - The immediate to add
@@ -230,11 +273,17 @@ enum {
   /// - InsnID - Instruction ID to modify
   /// - RendererID - The renderer to call
   GIR_ComplexRenderer,
+
   /// Render sub-operands of complex operands to the specified instruction
   /// - InsnID - Instruction ID to modify
   /// - RendererID - The renderer to call
   /// - RenderOpID - The suboperand to render.
   GIR_ComplexSubOperandRenderer,
+  /// Render operands to the specified instruction using a custom function
+  /// - InsnID - Instruction ID to modify
+  /// - OldInsnID - Instruction ID to get the matched operand from
+  /// - RendererFnID - Custom renderer function to call
+  GIR_CustomRenderer,
 
   /// Render a G_CONSTANT operator as a sign-extended immediate.
   /// - NewInsnID - Instruction ID to modify
@@ -242,24 +291,34 @@ enum {
   /// The operand index is implicitly 1.
   GIR_CopyConstantAsSImm,
 
+  /// Render a G_FCONSTANT operator as a sign-extended immediate.
+  /// - NewInsnID - Instruction ID to modify
+  /// - OldInsnID - Instruction ID to copy from
+  /// The operand index is implicitly 1.
+  GIR_CopyFConstantAsFPImm,
+
   /// Constrain an instruction operand to a register class.
   /// - InsnID - Instruction ID to modify
   /// - OpIdx - Operand index
   /// - RCEnum - Register class enumeration value
   GIR_ConstrainOperandRC,
+
   /// Constrain an instructions operands according to the instruction
   /// description.
   /// - InsnID - Instruction ID to modify
   GIR_ConstrainSelectedInstOperands,
+
   /// Merge all memory operands into instruction.
   /// - InsnID - Instruction ID to modify
   /// - MergeInsnID... - One or more Instruction ID to merge into the result.
   /// - GIU_MergeMemOperands_EndOfList - Terminates the list of instructions to
   ///                                    merge.
   GIR_MergeMemOperands,
+
   /// Erase from parent.
   /// - InsnID - Instruction ID to erase
   GIR_EraseFromParent,
+
   /// Create a new temporary register that's not constrained.
   /// - TempRegID - The temporary register ID to initialize.
   /// - Expected type
@@ -271,6 +330,9 @@ enum {
   /// Increment the rule coverage counter.
   /// - RuleID - The ID of the rule that was covered.
   GIR_Coverage,
+
+  /// Keeping track of the number of the GI opcodes. Must be the last entry.
+  GIU_NumOpcodes,
 };
 
 enum {
@@ -311,11 +373,27 @@ protected:
   };
 
 public:
-  template <class PredicateBitset, class ComplexMatcherMemFn>
-  struct MatcherInfoTy {
+  template <class PredicateBitset, class ComplexMatcherMemFn,
+            class CustomRendererFn>
+  struct ISelInfoTy {
+    ISelInfoTy(const LLT *TypeObjects, size_t NumTypeObjects,
+               const PredicateBitset *FeatureBitsets,
+               const ComplexMatcherMemFn *ComplexPredicates,
+               const CustomRendererFn *CustomRenderers)
+        : TypeObjects(TypeObjects),
+          FeatureBitsets(FeatureBitsets),
+          ComplexPredicates(ComplexPredicates),
+          CustomRenderers(CustomRenderers) {
+
+      for (size_t I = 0; I < NumTypeObjects; ++I)
+        TypeIDMap[TypeObjects[I]] = I;
+    }
     const LLT *TypeObjects;
     const PredicateBitset *FeatureBitsets;
     const ComplexMatcherMemFn *ComplexPredicates;
+    const CustomRendererFn *CustomRenderers;
+
+    SmallDenseMap<LLT, unsigned, 64> TypeIDMap;
   };
 
 protected:
@@ -324,23 +402,35 @@ protected:
   /// Execute a given matcher table and return true if the match was successful
   /// and false otherwise.
   template <class TgtInstructionSelector, class PredicateBitset,
-            class ComplexMatcherMemFn>
+            class ComplexMatcherMemFn, class CustomRendererFn>
   bool executeMatchTable(
       TgtInstructionSelector &ISel, NewMIVector &OutMIs, MatcherState &State,
-      const MatcherInfoTy<PredicateBitset, ComplexMatcherMemFn> &MatcherInfo,
+      const ISelInfoTy<PredicateBitset, ComplexMatcherMemFn, CustomRendererFn>
+          &ISelInfo,
       const int64_t *MatchTable, const TargetInstrInfo &TII,
       MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
       const RegisterBankInfo &RBI, const PredicateBitset &AvailableFeatures,
       CodeGenCoverage &CoverageInfo) const;
 
+  virtual const int64_t *getMatchTable() const {
+    llvm_unreachable("Should have been overridden by tablegen if used");
+  }
+
   virtual bool testImmPredicate_I64(unsigned, int64_t) const {
-    llvm_unreachable("Subclasses must override this to use tablegen");
+    llvm_unreachable(
+        "Subclasses must override this with a tablegen-erated function");
   }
   virtual bool testImmPredicate_APInt(unsigned, const APInt &) const {
-    llvm_unreachable("Subclasses must override this to use tablegen");
+    llvm_unreachable(
+        "Subclasses must override this with a tablegen-erated function");
   }
   virtual bool testImmPredicate_APFloat(unsigned, const APFloat &) const {
-    llvm_unreachable("Subclasses must override this to use tablegen");
+    llvm_unreachable(
+        "Subclasses must override this with a tablegen-erated function");
+  }
+  virtual bool testMIPredicate_MI(unsigned, const MachineInstr &) const {
+    llvm_unreachable(
+        "Subclasses must override this with a tablegen-erated function");
   }
 
   /// Constrain a register operand of an instruction \p I to a specified
@@ -353,20 +443,6 @@ protected:
                                      const TargetRegisterInfo &TRI,
                                      const RegisterBankInfo &RBI) const;
 
-  /// Mutate the newly-selected instruction \p I to constrain its (possibly
-  /// generic) virtual register operands to the instruction's register class.
-  /// This could involve inserting COPYs before (for uses) or after (for defs).
-  /// This requires the number of operands to match the instruction description.
-  /// \returns whether operand regclass constraining succeeded.
-  ///
-  // FIXME: Not all instructions have the same number of operands. We should
-  // probably expose a constrain helper per operand and let the target selector
-  // constrain individual registers, like fast-isel.
-  bool constrainSelectedInstRegOperands(MachineInstr &I,
-                                        const TargetInstrInfo &TII,
-                                        const TargetRegisterInfo &TRI,
-                                        const RegisterBankInfo &RBI) const;
-
   bool isOperandImmEqual(const MachineOperand &MO, int64_t Value,
                          const MachineRegisterInfo &MRI) const;
 
diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
index bf834cf8f5e3..2003a79f6b20 100644
--- a/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
+++ b/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
@@ -19,6 +19,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -40,19 +41,22 @@ enum {
   GIPFP_I64_Invalid = 0,
   GIPFP_APInt_Invalid = 0,
   GIPFP_APFloat_Invalid = 0,
+  GIPFP_MI_Invalid = 0,
 };
 
 template <class TgtInstructionSelector, class PredicateBitset,
-          class ComplexMatcherMemFn>
+          class ComplexMatcherMemFn, class CustomRendererFn>
 bool InstructionSelector::executeMatchTable(
     TgtInstructionSelector &ISel, NewMIVector &OutMIs, MatcherState &State,
-    const MatcherInfoTy<PredicateBitset, ComplexMatcherMemFn> &MatcherInfo,
+    const ISelInfoTy<PredicateBitset, ComplexMatcherMemFn, CustomRendererFn>
+        &ISelInfo,
     const int64_t *MatchTable, const TargetInstrInfo &TII,
     MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
     const RegisterBankInfo &RBI, const PredicateBitset &AvailableFeatures,
     CodeGenCoverage &CoverageInfo) const {
+
   uint64_t CurrentIdx = 0;
-  SmallVector<uint64_t, 8> OnFailResumeAt;
+  SmallVector<uint64_t, 4> OnFailResumeAt;
 
   enum RejectAction { RejectAndGiveUp, RejectAndResume };
   auto handleReject = [&]() -> RejectAction {
@@ -60,8 +64,7 @@ bool InstructionSelector::executeMatchTable(
                     dbgs() << CurrentIdx << ": Rejected\n");
     if (OnFailResumeAt.empty())
       return RejectAndGiveUp;
-    CurrentIdx = OnFailResumeAt.back();
-    OnFailResumeAt.pop_back();
+    CurrentIdx = OnFailResumeAt.pop_back_val();
     DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
                     dbgs() << CurrentIdx << ": Resume at " << CurrentIdx << " ("
                            << OnFailResumeAt.size() << " try-blocks remain)\n");
@@ -70,7 +73,8 @@ bool InstructionSelector::executeMatchTable(
 
   while (true) {
     assert(CurrentIdx != ~0u && "Invalid MatchTable index");
-    switch (MatchTable[CurrentIdx++]) {
+    int64_t MatcherOpcode = MatchTable[CurrentIdx++];
+    switch (MatcherOpcode) {
     case GIM_Try: {
       DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
                       dbgs() << CurrentIdx << ": Begin try-block\n");
@@ -124,8 +128,8 @@ bool InstructionSelector::executeMatchTable(
                       dbgs() << CurrentIdx
                              << ": GIM_CheckFeatures(ExpectedBitsetID="
                              << ExpectedBitsetID << ")\n");
-      if ((AvailableFeatures & MatcherInfo.FeatureBitsets[ExpectedBitsetID]) !=
-          MatcherInfo.FeatureBitsets[ExpectedBitsetID]) {
+      if ((AvailableFeatures & ISelInfo.FeatureBitsets[ExpectedBitsetID]) !=
+          ISelInfo.FeatureBitsets[ExpectedBitsetID]) {
         if (handleReject() == RejectAndGiveUp)
           return false;
       }
@@ -136,12 +140,13 @@ bool InstructionSelector::executeMatchTable(
       int64_t InsnID = MatchTable[CurrentIdx++];
       int64_t Expected = MatchTable[CurrentIdx++];
 
+      assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
       unsigned Opcode = State.MIs[InsnID]->getOpcode();
+
       DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
                       dbgs() << CurrentIdx << ": GIM_CheckOpcode(MIs[" << InsnID
                              << "], ExpectedOpcode=" << Expected
                              << ") // Got=" << Opcode << "\n");
-      assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
       if (Opcode != Expected) {
         if (handleReject() == RejectAndGiveUp)
           return false;
@@ -149,6 +154,77 @@ bool InstructionSelector::executeMatchTable(
       break;
     }
 
+    case GIM_SwitchOpcode: {
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t LowerBound = MatchTable[CurrentIdx++];
+      int64_t UpperBound = MatchTable[CurrentIdx++];
+      int64_t Default = MatchTable[CurrentIdx++];
+
+      assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
+      const int64_t Opcode = State.MIs[InsnID]->getOpcode();
+
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), {
+        dbgs() << CurrentIdx << ": GIM_SwitchOpcode(MIs[" << InsnID << "], ["
+               << LowerBound << ", " << UpperBound << "), Default=" << Default
+               << ", JumpTable...) // Got=" << Opcode << "\n";
+      });
+      if (Opcode < LowerBound || UpperBound <= Opcode) {
+        CurrentIdx = Default;
+        break;
+      }
+      CurrentIdx = MatchTable[CurrentIdx + (Opcode - LowerBound)];
+      if (!CurrentIdx) {
+        CurrentIdx = Default;
+	break;
+      }
+      OnFailResumeAt.push_back(Default);
+      break;
+    }
+
+    case GIM_SwitchType: {
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t OpIdx = MatchTable[CurrentIdx++];
+      int64_t LowerBound = MatchTable[CurrentIdx++];
+      int64_t UpperBound = MatchTable[CurrentIdx++];
+      int64_t Default = MatchTable[CurrentIdx++];
+
+      assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
+      MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx);
+
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), {
+        dbgs() << CurrentIdx << ": GIM_SwitchType(MIs[" << InsnID
+               << "]->getOperand(" << OpIdx << "), [" << LowerBound << ", "
+               << UpperBound << "), Default=" << Default
+               << ", JumpTable...) // Got=";
+        if (!MO.isReg())
+          dbgs() << "Not a VReg\n";
+        else
+          dbgs() << MRI.getType(MO.getReg()) << "\n";
+      });
+      if (!MO.isReg()) {
+        CurrentIdx = Default;
+        break;
+      }
+      const LLT Ty = MRI.getType(MO.getReg());
+      const auto TyI = ISelInfo.TypeIDMap.find(Ty);
+      if (TyI == ISelInfo.TypeIDMap.end()) {
+        CurrentIdx = Default;
+        break;
+      }
+      const int64_t TypeID = TyI->second;
+      if (TypeID < LowerBound || UpperBound <= TypeID) {
+        CurrentIdx = Default;
+        break;
+      }
+      CurrentIdx = MatchTable[CurrentIdx + (TypeID - LowerBound)];
+      if (!CurrentIdx) {
+        CurrentIdx = Default;
+        break;
+      }
+      OnFailResumeAt.push_back(Default);
+      break;
+    }
+
     case GIM_CheckNumOperands: {
       int64_t InsnID = MatchTable[CurrentIdx++];
       int64_t Expected = MatchTable[CurrentIdx++];
@@ -194,7 +270,8 @@ bool InstructionSelector::executeMatchTable(
                           << CurrentIdx << ": GIM_CheckAPIntImmPredicate(MIs["
                           << InsnID << "], Predicate=" << Predicate << ")\n");
       assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
-      assert(State.MIs[InsnID]->getOpcode() && "Expected G_CONSTANT");
+      assert(State.MIs[InsnID]->getOpcode() == TargetOpcode::G_CONSTANT &&
+             "Expected G_CONSTANT");
       assert(Predicate > GIPFP_APInt_Invalid && "Expected a valid predicate");
       APInt Value;
       if (State.MIs[InsnID]->getOperand(1).isCImm())
@@ -226,6 +303,21 @@ bool InstructionSelector::executeMatchTable(
           return false;
       break;
     }
+    case GIM_CheckCxxInsnPredicate: {
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t Predicate = MatchTable[CurrentIdx++];
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs()
+                          << CurrentIdx << ": GIM_CheckCxxPredicate(MIs["
+                          << InsnID << "], Predicate=" << Predicate << ")\n");
+      assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
+      assert(Predicate > GIPFP_MI_Invalid && "Expected a valid predicate");
+
+      if (!testMIPredicate_MI(Predicate, *State.MIs[InsnID]))
+        if (handleReject() == RejectAndGiveUp)
+          return false;
+      break;
+    }
     case GIM_CheckAtomicOrdering: {
       int64_t InsnID = MatchTable[CurrentIdx++];
       AtomicOrdering Ordering = (AtomicOrdering)MatchTable[CurrentIdx++];
@@ -233,7 +325,6 @@ bool InstructionSelector::executeMatchTable(
                       dbgs() << CurrentIdx << ": GIM_CheckAtomicOrdering(MIs["
                              << InsnID << "], " << (uint64_t)Ordering << ")\n");
       assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
-
       if (!State.MIs[InsnID]->hasOneMemOperand())
         if (handleReject() == RejectAndGiveUp)
           return false;
@@ -252,7 +343,6 @@ bool InstructionSelector::executeMatchTable(
                              << ": GIM_CheckAtomicOrderingOrStrongerThan(MIs["
                              << InsnID << "], " << (uint64_t)Ordering << ")\n");
       assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
-
       if (!State.MIs[InsnID]->hasOneMemOperand())
         if (handleReject() == RejectAndGiveUp)
           return false;
@@ -271,7 +361,6 @@ bool InstructionSelector::executeMatchTable(
                              << ": GIM_CheckAtomicOrderingWeakerThan(MIs["
                              << InsnID << "], " << (uint64_t)Ordering << ")\n");
       assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
-
       if (!State.MIs[InsnID]->hasOneMemOperand())
         if (handleReject() == RejectAndGiveUp)
           return false;
@@ -282,6 +371,87 @@ bool InstructionSelector::executeMatchTable(
             return false;
       break;
     }
+    case GIM_CheckMemorySizeEqualTo: {
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t MMOIdx = MatchTable[CurrentIdx++];
+      uint64_t Size = MatchTable[CurrentIdx++];
+
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx
+                             << ": GIM_CheckMemorySizeEqual(MIs[" << InsnID
+                             << "]->memoperands() + " << MMOIdx
+                             << ", Size=" << Size << ")\n");
+      assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
+
+      if (State.MIs[InsnID]->getNumMemOperands() <= MMOIdx) {
+        if (handleReject() == RejectAndGiveUp)
+          return false;
+        break;
+      }
+
+      MachineMemOperand *MMO = *(State.MIs[InsnID]->memoperands_begin() + MMOIdx);
+
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << MMO->getSize() << " bytes vs " << Size
+                             << " bytes\n");
+      if (MMO->getSize() != Size)
+        if (handleReject() == RejectAndGiveUp)
+          return false;
+
+      break;
+    }
+    case GIM_CheckMemorySizeEqualToLLT:
+    case GIM_CheckMemorySizeLessThanLLT:
+    case GIM_CheckMemorySizeGreaterThanLLT: {
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t MMOIdx = MatchTable[CurrentIdx++];
+      int64_t OpIdx = MatchTable[CurrentIdx++];
+
+      DEBUG_WITH_TYPE(
+          TgtInstructionSelector::getName(),
+          dbgs() << CurrentIdx << ": GIM_CheckMemorySize"
+                 << (MatcherOpcode == GIM_CheckMemorySizeEqualToLLT
+                         ? "EqualTo"
+                         : MatcherOpcode == GIM_CheckMemorySizeGreaterThanLLT
+                               ? "GreaterThan"
+                               : "LessThan")
+                 << "LLT(MIs[" << InsnID << "]->memoperands() + " << MMOIdx
+                 << ", OpIdx=" << OpIdx << ")\n");
+      assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
+
+      MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx);
+      if (!MO.isReg()) {
+        DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                        dbgs() << CurrentIdx << ": Not a register\n");
+        if (handleReject() == RejectAndGiveUp)
+          return false;
+        break;
+      }
+
+      if (State.MIs[InsnID]->getNumMemOperands() <= MMOIdx) {
+        if (handleReject() == RejectAndGiveUp)
+          return false;
+        break;
+      }
+
+      MachineMemOperand *MMO = *(State.MIs[InsnID]->memoperands_begin() + MMOIdx);
+
+      unsigned Size = MRI.getType(MO.getReg()).getSizeInBits();
+      if (MatcherOpcode == GIM_CheckMemorySizeEqualToLLT &&
+          MMO->getSize() * 8 != Size) {
+        if (handleReject() == RejectAndGiveUp)
+          return false;
+      } else if (MatcherOpcode == GIM_CheckMemorySizeLessThanLLT &&
+                 MMO->getSize() * 8 >= Size) {
+        if (handleReject() == RejectAndGiveUp)
+          return false;
+      } else if (MatcherOpcode == GIM_CheckMemorySizeGreaterThanLLT &&
+                 MMO->getSize() * 8 <= Size)
+        if (handleReject() == RejectAndGiveUp)
+          return false;
+
+      break;
+    }
     case GIM_CheckType: {
       int64_t InsnID = MatchTable[CurrentIdx++];
       int64_t OpIdx = MatchTable[CurrentIdx++];
@@ -291,8 +461,9 @@ bool InstructionSelector::executeMatchTable(
                              << "]->getOperand(" << OpIdx
                              << "), TypeID=" << TypeID << ")\n");
       assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
-      if (MRI.getType(State.MIs[InsnID]->getOperand(OpIdx).getReg()) !=
-          MatcherInfo.TypeObjects[TypeID]) {
+      MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx);
+      if (!MO.isReg() ||
+          MRI.getType(MO.getReg()) != ISelInfo.TypeObjects[TypeID]) {
         if (handleReject() == RejectAndGiveUp)
           return false;
       }
@@ -308,7 +479,6 @@ bool InstructionSelector::executeMatchTable(
                              << InsnID << "]->getOperand(" << OpIdx
                              << "), SizeInBits=" << SizeInBits << ")\n");
       assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
-
       // iPTR must be looked up in the target.
       if (SizeInBits == 0) {
         MachineFunction *MF = State.MIs[InsnID]->getParent()->getParent();
@@ -317,11 +487,15 @@ bool InstructionSelector::executeMatchTable(
 
       assert(SizeInBits != 0 && "Pointer size must be known");
 
-      const LLT &Ty = MRI.getType(State.MIs[InsnID]->getOperand(OpIdx).getReg());
-      if (!Ty.isPointer() || Ty.getSizeInBits() != SizeInBits) {
-        if (handleReject() == RejectAndGiveUp)
-          return false;
-      }
+      MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx);
+      if (MO.isReg()) {
+        const LLT &Ty = MRI.getType(MO.getReg());
+        if (!Ty.isPointer() || Ty.getSizeInBits() != SizeInBits)
+          if (handleReject() == RejectAndGiveUp)
+            return false;
+      } else if (handleReject() == RejectAndGiveUp)
+        return false;
+
       break;
     }
     case GIM_CheckRegBankForClass: {
@@ -333,9 +507,10 @@ bool InstructionSelector::executeMatchTable(
                              << InsnID << "]->getOperand(" << OpIdx
                              << "), RCEnum=" << RCEnum << ")\n");
       assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
-      if (&RBI.getRegBankFromRegClass(*TRI.getRegClass(RCEnum)) !=
-          RBI.getRegBank(State.MIs[InsnID]->getOperand(OpIdx).getReg(), MRI,
-                         TRI)) {
+      MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx);
+      if (!MO.isReg() ||
+          &RBI.getRegBankFromRegClass(*TRI.getRegClass(RCEnum)) !=
+              RBI.getRegBank(MO.getReg(), MRI, TRI)) {
         if (handleReject() == RejectAndGiveUp)
           return false;
       }
@@ -356,7 +531,7 @@ bool InstructionSelector::executeMatchTable(
       assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
       // FIXME: Use std::invoke() when it's available.
       ComplexRendererFns Renderer =
-          (ISel.*MatcherInfo.ComplexPredicates[ComplexPredicateID])(
+          (ISel.*ISelInfo.ComplexPredicates[ComplexPredicateID])(
               State.MIs[InsnID]->getOperand(OpIdx));
       if (Renderer.hasValue())
         State.Renderers[RendererID] = Renderer.getValue();
@@ -375,16 +550,19 @@ bool InstructionSelector::executeMatchTable(
                              << InsnID << "]->getOperand(" << OpIdx
                              << "), Value=" << Value << ")\n");
       assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
+      MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx);
+      if (MO.isReg()) {
+        // isOperandImmEqual() will sign-extend to 64-bits, so should we.
+        LLT Ty = MRI.getType(MO.getReg());
+        Value = SignExtend64(Value, Ty.getSizeInBits());
 
-      // isOperandImmEqual() will sign-extend to 64-bits, so should we.
-      LLT Ty = MRI.getType(State.MIs[InsnID]->getOperand(OpIdx).getReg());
-      Value = SignExtend64(Value, Ty.getSizeInBits());
+        if (!isOperandImmEqual(MO, Value, MRI)) {
+          if (handleReject() == RejectAndGiveUp)
+            return false;
+        }
+      } else if (handleReject() == RejectAndGiveUp)
+        return false;
 
-      if (!isOperandImmEqual(State.MIs[InsnID]->getOperand(OpIdx), Value,
-                             MRI)) {
-        if (handleReject() == RejectAndGiveUp)
-          return false;
-      }
       break;
     }
 
@@ -467,7 +645,7 @@ bool InstructionSelector::executeMatchTable(
     }
     case GIM_Reject:
       DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
-                      dbgs() << CurrentIdx << ": GIM_Reject");
+                      dbgs() << CurrentIdx << ": GIM_Reject\n");
       if (handleReject() == RejectAndGiveUp)
         return false;
       break;
@@ -649,6 +827,36 @@ bool InstructionSelector::executeMatchTable(
       break;
     }
 
+    // TODO: Needs a test case once we have a pattern that uses this.
+    case GIR_CopyFConstantAsFPImm: {
+      int64_t NewInsnID = MatchTable[CurrentIdx++];
+      int64_t OldInsnID = MatchTable[CurrentIdx++];
+      assert(OutMIs[NewInsnID] && "Attempted to add to undefined instruction");
+      assert(State.MIs[OldInsnID]->getOpcode() == TargetOpcode::G_FCONSTANT && "Expected G_FCONSTANT");
+      if (State.MIs[OldInsnID]->getOperand(1).isFPImm())
+        OutMIs[NewInsnID].addFPImm(
+            State.MIs[OldInsnID]->getOperand(1).getFPImm());
+      else
+        llvm_unreachable("Expected FPImm operand");
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": GIR_CopyFPConstantAsFPImm(OutMIs["
+                             << NewInsnID << "], MIs[" << OldInsnID << "])\n");
+      break;
+    }
+
+    case GIR_CustomRenderer: {
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t OldInsnID = MatchTable[CurrentIdx++];
+      int64_t RendererFnID = MatchTable[CurrentIdx++];
+      assert(OutMIs[InsnID] && "Attempted to add to undefined instruction");
+      DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+                      dbgs() << CurrentIdx << ": GIR_CustomRenderer(OutMIs["
+                             << InsnID << "], MIs[" << OldInsnID << "], "
+                             << RendererFnID << ")\n");
+      (ISel.*ISelInfo.CustomRenderers[RendererFnID])(OutMIs[InsnID],
+                                                     *State.MIs[OldInsnID]);
+      break;
+    }
     case GIR_ConstrainOperandRC: {
       int64_t InsnID = MatchTable[CurrentIdx++];
       int64_t OpIdx = MatchTable[CurrentIdx++];
@@ -710,7 +918,7 @@ bool InstructionSelector::executeMatchTable(
       int64_t TypeID = MatchTable[CurrentIdx++];
 
       State.TempRegisters[TempRegID] =
-          MRI.createGenericVirtualRegister(MatcherInfo.TypeObjects[TypeID]);
+          MRI.createGenericVirtualRegister(ISelInfo.TypeObjects[TypeID]);
       DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
                       dbgs() << CurrentIdx << ": TempRegs[" << TempRegID
                              << "] = GIR_MakeTempReg(" << TypeID << ")\n");
@@ -729,7 +937,7 @@ bool InstructionSelector::executeMatchTable(
 
     case GIR_Done:
       DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
-                      dbgs() << CurrentIdx << ": GIR_Done");
+                      dbgs() << CurrentIdx << ": GIR_Done\n");
       return true;
 
     default:
diff --git a/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index e7945ff5bf4f..873587651efd 100644
--- a/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -38,7 +38,7 @@ public:
       return false;
     if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_TRUNC,
                                            MI.getOperand(1).getReg(), MRI)) {
-      DEBUG(dbgs() << ".. Combine MI: " << MI;);
+      LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
       unsigned DstReg = MI.getOperand(0).getReg();
       unsigned SrcReg = DefMI->getOperand(1).getReg();
       Builder.setInstr(MI);
@@ -59,10 +59,10 @@ public:
                                            MI.getOperand(1).getReg(), MRI)) {
       unsigned DstReg = MI.getOperand(0).getReg();
       LLT DstTy = MRI.getType(DstReg);
-      if (isInstUnsupported(TargetOpcode::G_AND, DstTy) ||
-          isInstUnsupported(TargetOpcode::G_CONSTANT, DstTy))
+      if (isInstUnsupported({TargetOpcode::G_AND, {DstTy}}) ||
+          isInstUnsupported({TargetOpcode::G_CONSTANT, {DstTy}}))
         return false;
-      DEBUG(dbgs() << ".. Combine MI: " << MI;);
+      LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
       Builder.setInstr(MI);
       unsigned ZExtSrc = MI.getOperand(1).getReg();
       LLT ZExtSrcTy = MRI.getType(ZExtSrc);
@@ -87,11 +87,11 @@ public:
                                            MI.getOperand(1).getReg(), MRI)) {
       unsigned DstReg = MI.getOperand(0).getReg();
       LLT DstTy = MRI.getType(DstReg);
-      if (isInstUnsupported(TargetOpcode::G_SHL, DstTy) ||
-          isInstUnsupported(TargetOpcode::G_ASHR, DstTy) ||
-          isInstUnsupported(TargetOpcode::G_CONSTANT, DstTy))
+      if (isInstUnsupported({TargetOpcode::G_SHL, {DstTy}}) ||
+          isInstUnsupported({TargetOpcode::G_ASHR, {DstTy}}) ||
+          isInstUnsupported({TargetOpcode::G_CONSTANT, {DstTy}}))
         return false;
-      DEBUG(dbgs() << ".. Combine MI: " << MI;);
+      LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
       Builder.setInstr(MI);
       unsigned SExtSrc = MI.getOperand(1).getReg();
       LLT SExtSrcTy = MRI.getType(SExtSrc);
@@ -121,9 +121,9 @@ public:
                                            MI.getOperand(1).getReg(), MRI)) {
       unsigned DstReg = MI.getOperand(0).getReg();
       LLT DstTy = MRI.getType(DstReg);
-      if (isInstUnsupported(TargetOpcode::G_IMPLICIT_DEF, DstTy))
+      if (isInstUnsupported({TargetOpcode::G_IMPLICIT_DEF, {DstTy}}))
         return false;
-      DEBUG(dbgs() << ".. Combine EXT(IMPLICIT_DEF) " << MI;);
+      LLVM_DEBUG(dbgs() << ".. Combine EXT(IMPLICIT_DEF) " << MI;);
       Builder.setInstr(MI);
       Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, DstReg);
       markInstAndDefDead(MI, *DefMI, DeadInsts);
@@ -139,9 +139,9 @@ public:
       return false;
 
     unsigned NumDefs = MI.getNumOperands() - 1;
-    unsigned SrcReg = MI.getOperand(NumDefs).getReg();
-    MachineInstr *MergeI = MRI.getVRegDef(SrcReg);
-    if (!MergeI || (MergeI->getOpcode() != TargetOpcode::G_MERGE_VALUES))
+    MachineInstr *MergeI = getOpcodeDef(TargetOpcode::G_MERGE_VALUES,
+                                        MI.getOperand(NumDefs).getReg(), MRI);
+    if (!MergeI)
       return false;
 
     const unsigned NumMergeRegs = MergeI->getNumOperands() - 1;
@@ -253,11 +253,8 @@ private:
     // and as a result, %3, %2, %1 are dead.
     MachineInstr *PrevMI = &MI;
     while (PrevMI != &DefMI) {
-      // If we're dealing with G_UNMERGE_VALUES, tryCombineMerges doesn't really try
-      // to fold copies in between and we can ignore them here.
-      if (PrevMI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES)
-        break;
-      unsigned PrevRegSrc = PrevMI->getOperand(1).getReg();
+      unsigned PrevRegSrc =
+          PrevMI->getOperand(PrevMI->getNumOperands() - 1).getReg();
       MachineInstr *TmpDef = MRI.getVRegDef(PrevRegSrc);
       if (MRI.hasOneUse(PrevRegSrc)) {
         if (TmpDef != &DefMI) {
@@ -269,18 +266,16 @@ private:
         break;
       PrevMI = TmpDef;
     }
-    if ((PrevMI == &DefMI ||
-         DefMI.getOpcode() == TargetOpcode::G_MERGE_VALUES) &&
-        MRI.hasOneUse(DefMI.getOperand(0).getReg()))
+    if (PrevMI == &DefMI && MRI.hasOneUse(DefMI.getOperand(0).getReg()))
       DeadInsts.push_back(&DefMI);
   }
 
   /// Checks if the target legalizer info has specified anything about the
   /// instruction, or if unsupported.
-  bool isInstUnsupported(unsigned Opcode, const LLT &DstTy) const {
-    auto Action = LI.getAction({Opcode, 0, DstTy});
-    return Action.first == LegalizerInfo::LegalizeAction::Unsupported ||
-           Action.first == LegalizerInfo::LegalizeAction::NotFound;
+  bool isInstUnsupported(const LegalityQuery &Query) const {
+    using namespace LegalizeActions;
+    auto Step = LI.getAction(Query);
+    return Step.Action == Unsupported || Step.Action == NotFound;
   }
 };
 
diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index 8bd8a9dcd0e2..d122e67b87b8 100644
--- a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -93,12 +93,24 @@ public:
   const LegalizerInfo &getLegalizerInfo() const { return LI; }
 
 private:
+  /// Legalize a single operand \p OpIdx of the machine instruction \p MI as a
+  /// Use by extending the operand's type to \p WideTy using the specified \p
+  /// ExtOpcode for the extension instruction, and replacing the vreg of the
+  /// operand in place.
+  void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx,
+                      unsigned ExtOpcode);
+
+  /// Legalize a single operand \p OpIdx of the machine instruction \p MI as a
+  /// Def by extending the operand's type to \p WideTy and truncating it back
+  /// with the \p TruncOpcode, and replacing the vreg of the operand in place.
+  void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx = 0,
+                      unsigned TruncOpcode = TargetOpcode::G_TRUNC);
 
   /// Helper function to split a wide generic register into bitwise blocks with
   /// the given Type (which implies the number of blocks needed). The generic
   /// registers created are appended to Ops, starting at bit 0 of Reg.
   void extractParts(unsigned Reg, LLT Ty, int NumParts,
-                    SmallVectorImpl<unsigned> &Ops);
+                    SmallVectorImpl<unsigned> &VRegs);
 
   MachineRegisterInfo &MRI;
   const LegalizerInfo &LI;
diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
index b6735d538b37..713d72eb4c9b 100644
--- a/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
+++ b/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -19,8 +19,11 @@
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/LowLevelTypeImpl.h"
 #include <cassert>
 #include <cstdint>
@@ -30,9 +33,67 @@
 
 namespace llvm {
 
+extern cl::opt<bool> DisableGISelLegalityCheck;
+
 class MachineInstr;
 class MachineIRBuilder;
 class MachineRegisterInfo;
+class MCInstrInfo;
+
+namespace LegalizeActions {
+enum LegalizeAction : std::uint8_t {
+  /// The operation is expected to be selectable directly by the target, and
+  /// no transformation is necessary.
+  Legal,
+
+  /// The operation should be synthesized from multiple instructions acting on
+  /// a narrower scalar base-type. For example a 64-bit add might be
+  /// implemented in terms of 32-bit add-with-carry.
+  NarrowScalar,
+
+  /// The operation should be implemented in terms of a wider scalar
+  /// base-type. For example a <2 x s8> add could be implemented as a <2
+  /// x s32> add (ignoring the high bits).
+  WidenScalar,
+
+  /// The (vector) operation should be implemented by splitting it into
+  /// sub-vectors where the operation is legal. For example a <8 x s64> add
+  /// might be implemented as 4 separate <2 x s64> adds.
+  FewerElements,
+
+  /// The (vector) operation should be implemented by widening the input
+  /// vector and ignoring the lanes added by doing so. For example <2 x i8> is
+  /// rarely legal, but you might perform an <8 x i8> and then only look at
+  /// the first two results.
+  MoreElements,
+
+  /// The operation itself must be expressed in terms of simpler actions on
+  /// this target. E.g. a SREM replaced by an SDIV and subtraction.
+  Lower,
+
+  /// The operation should be implemented as a call to some kind of runtime
+  /// support library. For example this usually happens on machines that don't
+  /// support floating-point operations natively.
+  Libcall,
+
+  /// The target wants to do something special with this combination of
+  /// operand and type. A callback will be issued when it is needed.
+  Custom,
+
+  /// This operation is completely unsupported on the target. A programming
+  /// error has occurred.
+  Unsupported,
+
+  /// Sentinel value for when no action was found in the specified table.
+  NotFound,
+
+  /// Fall back onto the old rules.
+  /// TODO: Remove this once we've migrated
+  UseLegacyRules,
+};
+} // end namespace LegalizeActions
+
+using LegalizeActions::LegalizeAction;
 
 /// Legalization is decided based on an instruction's opcode, which type slot
 /// we're considering, and what the existing type is. These aspects are gathered
@@ -51,64 +112,642 @@ struct InstrAspect {
   }
 };
 
-class LegalizerInfo {
-public:
-  enum LegalizeAction : std::uint8_t {
-    /// The operation is expected to be selectable directly by the target, and
-    /// no transformation is necessary.
-    Legal,
-
-    /// The operation should be synthesized from multiple instructions acting on
-    /// a narrower scalar base-type. For example a 64-bit add might be
-    /// implemented in terms of 32-bit add-with-carry.
-    NarrowScalar,
-
-    /// The operation should be implemented in terms of a wider scalar
-    /// base-type. For example a <2 x s8> add could be implemented as a <2
-    /// x s32> add (ignoring the high bits).
-    WidenScalar,
-
-    /// The (vector) operation should be implemented by splitting it into
-    /// sub-vectors where the operation is legal. For example a <8 x s64> add
-    /// might be implemented as 4 separate <2 x s64> adds.
-    FewerElements,
-
-    /// The (vector) operation should be implemented by widening the input
-    /// vector and ignoring the lanes added by doing so. For example <2 x i8> is
-    /// rarely legal, but you might perform an <8 x i8> and then only look at
-    /// the first two results.
-    MoreElements,
-
-    /// The operation itself must be expressed in terms of simpler actions on
-    /// this target. E.g. a SREM replaced by an SDIV and subtraction.
-    Lower,
-
-    /// The operation should be implemented as a call to some kind of runtime
-    /// support library. For example this usually happens on machines that don't
-    /// support floating-point operations natively.
-    Libcall,
-
-    /// The target wants to do something special with this combination of
-    /// operand and type. A callback will be issued when it is needed.
-    Custom,
-
-    /// This operation is completely unsupported on the target. A programming
-    /// error has occurred.
-    Unsupported,
-
-    /// Sentinel value for when no action was found in the specified table.
-    NotFound,
+/// The LegalityQuery object bundles together all the information that's needed
+/// to decide whether a given operation is legal or not.
+/// For efficiency, it doesn't make a copy of Types so care must be taken not
+/// to free it before using the query.
+struct LegalityQuery {
+  unsigned Opcode;
+  ArrayRef<LLT> Types;
+
+  struct MemDesc {
+    uint64_t Size;
+    AtomicOrdering Ordering;
   };
 
+  /// Operations which require memory can use this to place requirements on the
+  /// memory type for each MMO.
+  ArrayRef<MemDesc> MMODescrs;
+
+  constexpr LegalityQuery(unsigned Opcode, const ArrayRef<LLT> Types,
+                          const ArrayRef<MemDesc> MMODescrs)
+      : Opcode(Opcode), Types(Types), MMODescrs(MMODescrs) {}
+  constexpr LegalityQuery(unsigned Opcode, const ArrayRef<LLT> Types)
+      : LegalityQuery(Opcode, Types, {}) {}
+
+  raw_ostream &print(raw_ostream &OS) const;
+};
+
+/// The result of a query. It either indicates a final answer of Legal or
+/// Unsupported or describes an action that must be taken to make an operation
+/// more legal.
+struct LegalizeActionStep {
+  /// The action to take or the final answer.
+  LegalizeAction Action;
+  /// If describing an action, the type index to change. Otherwise zero.
+  unsigned TypeIdx;
+  /// If describing an action, the new type for TypeIdx. Otherwise LLT{}.
+  LLT NewType;
+
+  LegalizeActionStep(LegalizeAction Action, unsigned TypeIdx,
+                     const LLT &NewType)
+      : Action(Action), TypeIdx(TypeIdx), NewType(NewType) {}
+
+  bool operator==(const LegalizeActionStep &RHS) const {
+    return std::tie(Action, TypeIdx, NewType) ==
+        std::tie(RHS.Action, RHS.TypeIdx, RHS.NewType);
+  }
+};
+
+using LegalityPredicate = std::function<bool (const LegalityQuery &)>;
+using LegalizeMutation =
+    std::function<std::pair<unsigned, LLT>(const LegalityQuery &)>;
+
+namespace LegalityPredicates {
+struct TypePairAndMemSize {
+  LLT Type0;
+  LLT Type1;
+  uint64_t MemSize;
+
+  bool operator==(const TypePairAndMemSize &Other) const {
+    return Type0 == Other.Type0 && Type1 == Other.Type1 &&
+           MemSize == Other.MemSize;
+  }
+};
+
+/// True iff P0 and P1 are true.
+template<typename Predicate>
+Predicate all(Predicate P0, Predicate P1) {
+  return [=](const LegalityQuery &Query) {
+    return P0(Query) && P1(Query);
+  };
+}
+/// True iff all given predicates are true.
+template<typename Predicate, typename... Args>
+Predicate all(Predicate P0, Predicate P1, Args... args) {
+  return all(all(P0, P1), args...);
+}
+/// True iff the given type index is the specified types.
+LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit);
+/// True iff the given type index is one of the specified types.
+LegalityPredicate typeInSet(unsigned TypeIdx,
+                            std::initializer_list<LLT> TypesInit);
+/// True iff the given types for the given pair of type indexes is one of the
+/// specified type pairs.
+LegalityPredicate
+typePairInSet(unsigned TypeIdx0, unsigned TypeIdx1,
+              std::initializer_list<std::pair<LLT, LLT>> TypesInit);
+/// True iff the given types for the given pair of type indexes is one of the
+/// specified type pairs.
+LegalityPredicate typePairAndMemSizeInSet(
+    unsigned TypeIdx0, unsigned TypeIdx1, unsigned MMOIdx,
+    std::initializer_list<TypePairAndMemSize> TypesAndMemSizeInit);
+/// True iff the specified type index is a scalar.
+LegalityPredicate isScalar(unsigned TypeIdx);
+/// True iff the specified type index is a scalar that's narrower than the given
+/// size.
+LegalityPredicate narrowerThan(unsigned TypeIdx, unsigned Size);
+/// True iff the specified type index is a scalar that's wider than the given
+/// size.
+LegalityPredicate widerThan(unsigned TypeIdx, unsigned Size);
+/// True iff the specified type index is a scalar whose size is not a power of
+/// 2.
+LegalityPredicate sizeNotPow2(unsigned TypeIdx);
+/// True iff the specified MMO index has a size that is not a power of 2
+LegalityPredicate memSizeInBytesNotPow2(unsigned MMOIdx);
+/// True iff the specified type index is a vector whose element count is not a
+/// power of 2.
+LegalityPredicate numElementsNotPow2(unsigned TypeIdx);
+/// True iff the specified MMO index has at an atomic ordering of at Ordering or
+/// stronger.
+LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx,
+                                                      AtomicOrdering Ordering);
+} // end namespace LegalityPredicates
+
+namespace LegalizeMutations {
+/// Select this specific type for the given type index.
+LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty);
+/// Keep the same type as the given type index.
+LegalizeMutation changeTo(unsigned TypeIdx, unsigned FromTypeIdx);
+/// Widen the type for the given type index to the next power of 2.
+LegalizeMutation widenScalarToNextPow2(unsigned TypeIdx, unsigned Min = 0);
+/// Add more elements to the type for the given type index to the next power of
+/// 2.
+LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min = 0);
+} // end namespace LegalizeMutations
+
+/// A single rule in a legalizer info ruleset.
+/// The specified action is chosen when the predicate is true. Where appropriate
+/// for the action (e.g. for WidenScalar) the new type is selected using the
+/// given mutator.
+class LegalizeRule {
+  LegalityPredicate Predicate;
+  LegalizeAction Action;
+  LegalizeMutation Mutation;
+
+public:
+  LegalizeRule(LegalityPredicate Predicate, LegalizeAction Action,
+               LegalizeMutation Mutation = nullptr)
+      : Predicate(Predicate), Action(Action), Mutation(Mutation) {}
+
+  /// Test whether the LegalityQuery matches.
+  bool match(const LegalityQuery &Query) const {
+    return Predicate(Query);
+  }
+
+  LegalizeAction getAction() const { return Action; }
+
+  /// Determine the change to make.
+  std::pair<unsigned, LLT> determineMutation(const LegalityQuery &Query) const {
+    if (Mutation)
+      return Mutation(Query);
+    return std::make_pair(0, LLT{});
+  }
+};
+
+class LegalizeRuleSet {
+  /// When non-zero, the opcode we are an alias of
+  unsigned AliasOf;
+  /// If true, there is another opcode that aliases this one
+  bool IsAliasedByAnother;
+  SmallVector<LegalizeRule, 2> Rules;
+
+#ifndef NDEBUG
+  /// If bit I is set, this rule set contains a rule that may handle (predicate
+  /// or perform an action upon (or both)) the type index I. The uncertainty
+  /// comes from free-form rules executing user-provided lambda functions. We
+  /// conservatively assume such rules do the right thing and cover all type
+  /// indices. The bitset is intentionally 1 bit wider than it absolutely needs
+  /// to be to distinguish such cases from the cases where all type indices are
+  /// individually handled.
+  SmallBitVector TypeIdxsCovered{MCOI::OPERAND_LAST_GENERIC -
+                                 MCOI::OPERAND_FIRST_GENERIC + 2};
+#endif
+
+  unsigned typeIdx(unsigned TypeIdx) {
+    assert(TypeIdx <=
+               (MCOI::OPERAND_LAST_GENERIC - MCOI::OPERAND_FIRST_GENERIC) &&
+           "Type Index is out of bounds");
+#ifndef NDEBUG
+    TypeIdxsCovered.set(TypeIdx);
+#endif
+    return TypeIdx;
+  }
+  void markAllTypeIdxsAsCovered() {
+#ifndef NDEBUG
+    TypeIdxsCovered.set();
+#endif
+  }
+
+  void add(const LegalizeRule &Rule) {
+    assert(AliasOf == 0 &&
+           "RuleSet is aliased, change the representative opcode instead");
+    Rules.push_back(Rule);
+  }
+
+  static bool always(const LegalityQuery &) { return true; }
+
+  /// Use the given action when the predicate is true.
+  /// Action should not be an action that requires mutation.
+  LegalizeRuleSet &actionIf(LegalizeAction Action,
+                            LegalityPredicate Predicate) {
+    add({Predicate, Action});
+    return *this;
+  }
+  /// Use the given action when the predicate is true.
+  /// Action should be an action that requires mutation.
+  LegalizeRuleSet &actionIf(LegalizeAction Action, LegalityPredicate Predicate,
+                            LegalizeMutation Mutation) {
+    add({Predicate, Action, Mutation});
+    return *this;
+  }
+  /// Use the given action when type index 0 is any type in the given list.
+  /// Action should not be an action that requires mutation.
+  LegalizeRuleSet &actionFor(LegalizeAction Action,
+                             std::initializer_list<LLT> Types) {
+    using namespace LegalityPredicates;
+    return actionIf(Action, typeInSet(typeIdx(0), Types));
+  }
+  /// Use the given action when type index 0 is any type in the given list.
+  /// Action should be an action that requires mutation.
+  LegalizeRuleSet &actionFor(LegalizeAction Action,
+                             std::initializer_list<LLT> Types,
+                             LegalizeMutation Mutation) {
+    using namespace LegalityPredicates;
+    return actionIf(Action, typeInSet(typeIdx(0), Types), Mutation);
+  }
+  /// Use the given action when type indexes 0 and 1 is any type pair in the
+  /// given list.
+  /// Action should not be an action that requires mutation.
+  LegalizeRuleSet &actionFor(LegalizeAction Action,
+                             std::initializer_list<std::pair<LLT, LLT>> Types) {
+    using namespace LegalityPredicates;
+    return actionIf(Action, typePairInSet(typeIdx(0), typeIdx(1), Types));
+  }
+  /// Use the given action when type indexes 0 and 1 is any type pair in the
+  /// given list.
+  /// Action should be an action that requires mutation.
+  LegalizeRuleSet &actionFor(LegalizeAction Action,
+                             std::initializer_list<std::pair<LLT, LLT>> Types,
+                             LegalizeMutation Mutation) {
+    using namespace LegalityPredicates;
+    return actionIf(Action, typePairInSet(typeIdx(0), typeIdx(1), Types),
+                    Mutation);
+  }
+  /// Use the given action when type indexes 0 and 1 are both in the given list.
+  /// That is, the type pair is in the cartesian product of the list.
+  /// Action should not be an action that requires mutation.
+  LegalizeRuleSet &actionForCartesianProduct(LegalizeAction Action,
+                                             std::initializer_list<LLT> Types) {
+    using namespace LegalityPredicates;
+    return actionIf(Action, all(typeInSet(typeIdx(0), Types),
+                                typeInSet(typeIdx(1), Types)));
+  }
+  /// Use the given action when type indexes 0 and 1 are both in their
+  /// respective lists.
+  /// That is, the type pair is in the cartesian product of the lists
+  /// Action should not be an action that requires mutation.
+  LegalizeRuleSet &
+  actionForCartesianProduct(LegalizeAction Action,
+                            std::initializer_list<LLT> Types0,
+                            std::initializer_list<LLT> Types1) {
+    using namespace LegalityPredicates;
+    return actionIf(Action, all(typeInSet(typeIdx(0), Types0),
+                                typeInSet(typeIdx(1), Types1)));
+  }
+  /// Use the given action when type indexes 0, 1, and 2 are all in their
+  /// respective lists.
+  /// That is, the type triple is in the cartesian product of the lists
+  /// Action should not be an action that requires mutation.
+  LegalizeRuleSet &actionForCartesianProduct(
+      LegalizeAction Action, std::initializer_list<LLT> Types0,
+      std::initializer_list<LLT> Types1, std::initializer_list<LLT> Types2) {
+    using namespace LegalityPredicates;
+    return actionIf(Action, all(typeInSet(typeIdx(0), Types0),
+                                all(typeInSet(typeIdx(1), Types1),
+                                    typeInSet(typeIdx(2), Types2))));
+  }
+
+public:
+  LegalizeRuleSet() : AliasOf(0), IsAliasedByAnother(false), Rules() {}
+
+  bool isAliasedByAnother() { return IsAliasedByAnother; }
+  void setIsAliasedByAnother() { IsAliasedByAnother = true; }
+  void aliasTo(unsigned Opcode) {
+    assert((AliasOf == 0 || AliasOf == Opcode) &&
+           "Opcode is already aliased to another opcode");
+    assert(Rules.empty() && "Aliasing will discard rules");
+    AliasOf = Opcode;
+  }
+  unsigned getAlias() const { return AliasOf; }
+
+  /// The instruction is legal if predicate is true.
+  LegalizeRuleSet &legalIf(LegalityPredicate Predicate) {
+    // We have no choice but conservatively assume that the free-form
+    // user-provided Predicate properly handles all type indices:
+    markAllTypeIdxsAsCovered();
+    return actionIf(LegalizeAction::Legal, Predicate);
+  }
+  /// The instruction is legal when type index 0 is any type in the given list.
+  LegalizeRuleSet &legalFor(std::initializer_list<LLT> Types) {
+    return actionFor(LegalizeAction::Legal, Types);
+  }
+  /// The instruction is legal when type indexes 0 and 1 is any type pair in the
+  /// given list.
+  LegalizeRuleSet &legalFor(std::initializer_list<std::pair<LLT, LLT>> Types) {
+    return actionFor(LegalizeAction::Legal, Types);
+  }
+  /// The instruction is legal when type indexes 0 and 1 along with the memory
+  /// size is any type and size tuple in the given list.
+  LegalizeRuleSet &legalForTypesWithMemSize(
+      std::initializer_list<LegalityPredicates::TypePairAndMemSize>
+          TypesAndMemSize) {
+    return actionIf(LegalizeAction::Legal,
+                    LegalityPredicates::typePairAndMemSizeInSet(
+                        typeIdx(0), typeIdx(1), /*MMOIdx*/ 0, TypesAndMemSize));
+  }
+  /// The instruction is legal when type indexes 0 and 1 are both in the given
+  /// list. That is, the type pair is in the cartesian product of the list.
+  LegalizeRuleSet &legalForCartesianProduct(std::initializer_list<LLT> Types) {
+    return actionForCartesianProduct(LegalizeAction::Legal, Types);
+  }
+  /// The instruction is legal when type indexes 0 and 1 are both their
+  /// respective lists.
+  LegalizeRuleSet &legalForCartesianProduct(std::initializer_list<LLT> Types0,
+                                            std::initializer_list<LLT> Types1) {
+    return actionForCartesianProduct(LegalizeAction::Legal, Types0, Types1);
+  }
+
+  /// The instruction is lowered.
+  LegalizeRuleSet &lower() {
+    using namespace LegalizeMutations;
+    // We have no choice but conservatively assume that predicate-less lowering
+    // properly handles all type indices by design:
+    markAllTypeIdxsAsCovered();
+    return actionIf(LegalizeAction::Lower, always);
+  }
+  /// The instruction is lowered if predicate is true. Keep type index 0 as the
+  /// same type.
+  LegalizeRuleSet &lowerIf(LegalityPredicate Predicate) {
+    using namespace LegalizeMutations;
+    // We have no choice but conservatively assume that lowering with a
+    // free-form user provided Predicate properly handles all type indices:
+    markAllTypeIdxsAsCovered();
+    return actionIf(LegalizeAction::Lower, Predicate);
+  }
+  /// The instruction is lowered if predicate is true.
+  LegalizeRuleSet &lowerIf(LegalityPredicate Predicate,
+                           LegalizeMutation Mutation) {
+    // We have no choice but conservatively assume that lowering with a
+    // free-form user provided Predicate properly handles all type indices:
+    markAllTypeIdxsAsCovered();
+    return actionIf(LegalizeAction::Lower, Predicate, Mutation);
+  }
+  /// The instruction is lowered when type index 0 is any type in the given
+  /// list. Keep type index 0 as the same type.
+  LegalizeRuleSet &lowerFor(std::initializer_list<LLT> Types) {
+    return actionFor(LegalizeAction::Lower, Types,
+                     LegalizeMutations::changeTo(0, 0));
+  }
+  /// The instruction is lowered when type index 0 is any type in the given
+  /// list.
+  LegalizeRuleSet &lowerFor(std::initializer_list<LLT> Types,
+                            LegalizeMutation Mutation) {
+    return actionFor(LegalizeAction::Lower, Types, Mutation);
+  }
+  /// The instruction is lowered when type indexes 0 and 1 is any type pair in
+  /// the given list. Keep type index 0 as the same type.
+  LegalizeRuleSet &lowerFor(std::initializer_list<std::pair<LLT, LLT>> Types) {
+    return actionFor(LegalizeAction::Lower, Types,
+                     LegalizeMutations::changeTo(0, 0));
+  }
+  /// The instruction is lowered when type indexes 0 and 1 is any type pair in
+  /// the given list.
+  LegalizeRuleSet &lowerFor(std::initializer_list<std::pair<LLT, LLT>> Types,
+                            LegalizeMutation Mutation) {
+    return actionFor(LegalizeAction::Lower, Types, Mutation);
+  }
+  /// The instruction is lowered when type indexes 0 and 1 are both in their
+  /// respective lists.
+  LegalizeRuleSet &lowerForCartesianProduct(std::initializer_list<LLT> Types0,
+                                            std::initializer_list<LLT> Types1) {
+    using namespace LegalityPredicates;
+    return actionForCartesianProduct(LegalizeAction::Lower, Types0, Types1);
+  }
+  /// The instruction is lowered when when type indexes 0, 1, and 2 are all in
+  /// their respective lists.
+  LegalizeRuleSet &lowerForCartesianProduct(std::initializer_list<LLT> Types0,
+                                            std::initializer_list<LLT> Types1,
+                                            std::initializer_list<LLT> Types2) {
+    using namespace LegalityPredicates;
+    return actionForCartesianProduct(LegalizeAction::Lower, Types0, Types1,
+                                     Types2);
+  }
+
+  /// Like legalIf, but for the Libcall action.
+  LegalizeRuleSet &libcallIf(LegalityPredicate Predicate) {
+    // We have no choice but conservatively assume that a libcall with a
+    // free-form user provided Predicate properly handles all type indices:
+    markAllTypeIdxsAsCovered();
+    return actionIf(LegalizeAction::Libcall, Predicate);
+  }
+  LegalizeRuleSet &libcallFor(std::initializer_list<LLT> Types) {
+    return actionFor(LegalizeAction::Libcall, Types);
+  }
+  LegalizeRuleSet &
+  libcallFor(std::initializer_list<std::pair<LLT, LLT>> Types) {
+    return actionFor(LegalizeAction::Libcall, Types);
+  }
+  LegalizeRuleSet &
+  libcallForCartesianProduct(std::initializer_list<LLT> Types) {
+    return actionForCartesianProduct(LegalizeAction::Libcall, Types);
+  }
+  LegalizeRuleSet &
+  libcallForCartesianProduct(std::initializer_list<LLT> Types0,
+                             std::initializer_list<LLT> Types1) {
+    return actionForCartesianProduct(LegalizeAction::Libcall, Types0, Types1);
+  }
+
+  /// Widen the scalar to the one selected by the mutation if the predicate is
+  /// true.
+  LegalizeRuleSet &widenScalarIf(LegalityPredicate Predicate,
+                                 LegalizeMutation Mutation) {
+    // We have no choice but conservatively assume that an action with a
+    // free-form user provided Predicate properly handles all type indices:
+    markAllTypeIdxsAsCovered();
+    return actionIf(LegalizeAction::WidenScalar, Predicate, Mutation);
+  }
+  /// Narrow the scalar to the one selected by the mutation if the predicate is
+  /// true.
+  LegalizeRuleSet &narrowScalarIf(LegalityPredicate Predicate,
+                                  LegalizeMutation Mutation) {
+    // We have no choice but conservatively assume that an action with a
+    // free-form user provided Predicate properly handles all type indices:
+    markAllTypeIdxsAsCovered();
+    return actionIf(LegalizeAction::NarrowScalar, Predicate, Mutation);
+  }
+
+  /// Add more elements to reach the type selected by the mutation if the
+  /// predicate is true.
+  LegalizeRuleSet &moreElementsIf(LegalityPredicate Predicate,
+                                  LegalizeMutation Mutation) {
+    // We have no choice but conservatively assume that an action with a
+    // free-form user provided Predicate properly handles all type indices:
+    markAllTypeIdxsAsCovered();
+    return actionIf(LegalizeAction::MoreElements, Predicate, Mutation);
+  }
+  /// Remove elements to reach the type selected by the mutation if the
+  /// predicate is true.
+  LegalizeRuleSet &fewerElementsIf(LegalityPredicate Predicate,
+                                   LegalizeMutation Mutation) {
+    // We have no choice but conservatively assume that an action with a
+    // free-form user provided Predicate properly handles all type indices:
+    markAllTypeIdxsAsCovered();
+    return actionIf(LegalizeAction::FewerElements, Predicate, Mutation);
+  }
+
+  /// The instruction is unsupported.
+  LegalizeRuleSet &unsupported() {
+    return actionIf(LegalizeAction::Unsupported, always);
+  }
+  LegalizeRuleSet &unsupportedIf(LegalityPredicate Predicate) {
+    return actionIf(LegalizeAction::Unsupported, Predicate);
+  }
+  LegalizeRuleSet &unsupportedIfMemSizeNotPow2() {
+    return actionIf(LegalizeAction::Unsupported,
+                    LegalityPredicates::memSizeInBytesNotPow2(0));
+  }
+
+  LegalizeRuleSet &customIf(LegalityPredicate Predicate) {
+    // We have no choice but conservatively assume that a custom action with a
+    // free-form user provided Predicate properly handles all type indices:
+    markAllTypeIdxsAsCovered();
+    return actionIf(LegalizeAction::Custom, Predicate);
+  }
+  LegalizeRuleSet &customFor(std::initializer_list<LLT> Types) {
+    return actionFor(LegalizeAction::Custom, Types);
+  }
+  LegalizeRuleSet &customForCartesianProduct(std::initializer_list<LLT> Types) {
+    return actionForCartesianProduct(LegalizeAction::Custom, Types);
+  }
+  LegalizeRuleSet &
+  customForCartesianProduct(std::initializer_list<LLT> Types0,
+                            std::initializer_list<LLT> Types1) {
+    return actionForCartesianProduct(LegalizeAction::Custom, Types0, Types1);
+  }
+
+  /// Widen the scalar to the next power of two that is at least MinSize.
+  /// No effect if the type is not a scalar or is a power of two.
+  LegalizeRuleSet &widenScalarToNextPow2(unsigned TypeIdx,
+                                         unsigned MinSize = 0) {
+    using namespace LegalityPredicates;
+    return actionIf(LegalizeAction::WidenScalar, sizeNotPow2(typeIdx(TypeIdx)),
+                    LegalizeMutations::widenScalarToNextPow2(TypeIdx, MinSize));
+  }
+
+  LegalizeRuleSet &narrowScalar(unsigned TypeIdx, LegalizeMutation Mutation) {
+    using namespace LegalityPredicates;
+    return actionIf(LegalizeAction::NarrowScalar, isScalar(typeIdx(TypeIdx)),
+                    Mutation);
+  }
+
+  /// Ensure the scalar is at least as wide as Ty.
+  LegalizeRuleSet &minScalar(unsigned TypeIdx, const LLT &Ty) {
+    using namespace LegalityPredicates;
+    using namespace LegalizeMutations;
+    return actionIf(LegalizeAction::WidenScalar,
+                    narrowerThan(TypeIdx, Ty.getSizeInBits()),
+                    changeTo(typeIdx(TypeIdx), Ty));
+  }
+
+  /// Ensure the scalar is at most as wide as Ty.
+  LegalizeRuleSet &maxScalar(unsigned TypeIdx, const LLT &Ty) {
+    using namespace LegalityPredicates;
+    using namespace LegalizeMutations;
+    return actionIf(LegalizeAction::NarrowScalar,
+                    widerThan(TypeIdx, Ty.getSizeInBits()),
+                    changeTo(typeIdx(TypeIdx), Ty));
+  }
+
+  /// Conditionally limit the maximum size of the scalar.
+  /// For example, when the maximum size of one type depends on the size of
+  /// another such as extracting N bits from an M bit container.
+  LegalizeRuleSet &maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx,
+                               const LLT &Ty) {
+    using namespace LegalityPredicates;
+    using namespace LegalizeMutations;
+    return actionIf(LegalizeAction::NarrowScalar,
+                    [=](const LegalityQuery &Query) {
+                      return widerThan(TypeIdx, Ty.getSizeInBits()) &&
+                             Predicate(Query);
+                    },
+                    changeTo(typeIdx(TypeIdx), Ty));
+  }
+
+  /// Limit the range of scalar sizes to MinTy and MaxTy.
+  LegalizeRuleSet &clampScalar(unsigned TypeIdx, const LLT &MinTy,
+                               const LLT &MaxTy) {
+    assert(MinTy.isScalar() && MaxTy.isScalar() && "Expected scalar types");
+    return minScalar(TypeIdx, MinTy).maxScalar(TypeIdx, MaxTy);
+  }
+
+  /// Add more elements to the vector to reach the next power of two.
+  /// No effect if the type is not a vector or the element count is a power of
+  /// two.
+  LegalizeRuleSet &moreElementsToNextPow2(unsigned TypeIdx) {
+    using namespace LegalityPredicates;
+    return actionIf(LegalizeAction::MoreElements,
+                    numElementsNotPow2(typeIdx(TypeIdx)),
+                    LegalizeMutations::moreElementsToNextPow2(TypeIdx));
+  }
+
+  /// Limit the number of elements in EltTy vectors to at least MinElements.
+  LegalizeRuleSet &clampMinNumElements(unsigned TypeIdx, const LLT &EltTy,
+                                       unsigned MinElements) {
+    // Mark the type index as covered:
+    typeIdx(TypeIdx);
+    return actionIf(
+        LegalizeAction::MoreElements,
+        [=](const LegalityQuery &Query) {
+          LLT VecTy = Query.Types[TypeIdx];
+          return VecTy.isVector() && VecTy.getElementType() == EltTy &&
+                 VecTy.getNumElements() < MinElements;
+        },
+        [=](const LegalityQuery &Query) {
+          LLT VecTy = Query.Types[TypeIdx];
+          return std::make_pair(
+              TypeIdx, LLT::vector(MinElements, VecTy.getScalarSizeInBits()));
+        });
+  }
+  /// Limit the number of elements in EltTy vectors to at most MaxElements.
+  LegalizeRuleSet &clampMaxNumElements(unsigned TypeIdx, const LLT &EltTy,
+                                       unsigned MaxElements) {
+    // Mark the type index as covered:
+    typeIdx(TypeIdx);
+    return actionIf(
+        LegalizeAction::FewerElements,
+        [=](const LegalityQuery &Query) {
+          LLT VecTy = Query.Types[TypeIdx];
+          return VecTy.isVector() && VecTy.getElementType() == EltTy &&
+                 VecTy.getNumElements() > MaxElements;
+        },
+        [=](const LegalityQuery &Query) {
+          LLT VecTy = Query.Types[TypeIdx];
+          return std::make_pair(
+              TypeIdx, LLT::vector(MaxElements, VecTy.getScalarSizeInBits()));
+        });
+  }
+  /// Limit the number of elements for the given vectors to at least MinTy's
+  /// number of elements and at most MaxTy's number of elements.
+  ///
+  /// No effect if the type is not a vector or does not have the same element
+  /// type as the constraints.
+  /// The element type of MinTy and MaxTy must match.
+  LegalizeRuleSet &clampNumElements(unsigned TypeIdx, const LLT &MinTy,
+                                    const LLT &MaxTy) {
+    assert(MinTy.getElementType() == MaxTy.getElementType() &&
+           "Expected element types to agree");
+
+    const LLT &EltTy = MinTy.getElementType();
+    return clampMinNumElements(TypeIdx, EltTy, MinTy.getNumElements())
+        .clampMaxNumElements(TypeIdx, EltTy, MaxTy.getNumElements());
+  }
+
+  /// Fallback on the previous implementation. This should only be used while
+  /// porting a rule.
+  LegalizeRuleSet &fallback() {
+    add({always, LegalizeAction::UseLegacyRules});
+    return *this;
+  }
+
+  /// Check if there is no type index which is obviously not handled by the
+  /// LegalizeRuleSet in any way at all.
+  /// \pre Type indices of the opcode form a dense [0, \p NumTypeIdxs) set.
+  bool verifyTypeIdxsCoverage(unsigned NumTypeIdxs) const;
+
+  /// Apply the ruleset to the given LegalityQuery.
+  LegalizeActionStep apply(const LegalityQuery &Query) const;
+};
+
+class LegalizerInfo {
+public:
   LegalizerInfo();
   virtual ~LegalizerInfo() = default;
 
+  unsigned getOpcodeIdxForOpcode(unsigned Opcode) const;
+  unsigned getActionDefinitionsIdx(unsigned Opcode) const;
+
   /// Compute any ancillary tables needed to quickly decide how an operation
   /// should be handled. This must be called after all "set*Action"methods but
   /// before any query is made or incorrect results may be returned.
   void computeTables();
 
+  /// Perform simple self-diagnostic and assert if there is anything obviously
+  /// wrong with the actions set up.
+  void verify(const MCInstrInfo &MII) const;
+
   static bool needsLegalizingToDifferentSize(const LegalizeAction Action) {
+    using namespace LegalizeActions;
     switch (Action) {
     case NarrowScalar:
     case WidenScalar:
@@ -121,8 +760,8 @@ public:
     }
   }
 
-  typedef std::pair<uint16_t, LegalizeAction> SizeAndAction;
-  typedef std::vector<SizeAndAction> SizeAndActionsVec;
+  using SizeAndAction = std::pair<uint16_t, LegalizeAction>;
+  using SizeAndActionsVec = std::vector<SizeAndAction>;
   using SizeChangeStrategy =
       std::function<SizeAndActionsVec(const SizeAndActionsVec &v)>;
 
@@ -186,8 +825,9 @@ public:
   /// and Unsupported for all other scalar types T.
   static SizeAndActionsVec
   unsupportedForDifferentSizes(const SizeAndActionsVec &v) {
+    using namespace LegalizeActions;
     return increaseToLargerTypesAndDecreaseToLargest(v, Unsupported,
-                                                        Unsupported);
+                                                     Unsupported);
   }
 
   /// A SizeChangeStrategy for the common case where legalization for a
@@ -196,32 +836,36 @@ public:
   /// largest legal type.
   static SizeAndActionsVec
   widenToLargerTypesAndNarrowToLargest(const SizeAndActionsVec &v) {
+    using namespace LegalizeActions;
     assert(v.size() > 0 &&
            "At least one size that can be legalized towards is needed"
            " for this SizeChangeStrategy");
     return increaseToLargerTypesAndDecreaseToLargest(v, WidenScalar,
-                                                        NarrowScalar);
+                                                     NarrowScalar);
   }
 
   static SizeAndActionsVec
   widenToLargerTypesUnsupportedOtherwise(const SizeAndActionsVec &v) {
+    using namespace LegalizeActions;
     return increaseToLargerTypesAndDecreaseToLargest(v, WidenScalar,
-                                                        Unsupported);
+                                                     Unsupported);
   }
 
   static SizeAndActionsVec
   narrowToSmallerAndUnsupportedIfTooSmall(const SizeAndActionsVec &v) {
+    using namespace LegalizeActions;
     return decreaseToSmallerTypesAndIncreaseToSmallest(v, NarrowScalar,
-                                                          Unsupported);
+                                                       Unsupported);
   }
 
   static SizeAndActionsVec
   narrowToSmallerAndWidenToSmallest(const SizeAndActionsVec &v) {
+    using namespace LegalizeActions;
     assert(v.size() > 0 &&
            "At least one size that can be legalized towards is needed"
            " for this SizeChangeStrategy");
     return decreaseToSmallerTypesAndIncreaseToSmallest(v, NarrowScalar,
-                                                          WidenScalar);
+                                                       WidenScalar);
   }
 
   /// A SizeChangeStrategy for the common case where legalization for a
@@ -244,8 +888,9 @@ public:
   ///       (FewerElements, vector(4,32)).
   static SizeAndActionsVec
   moreToWiderTypesAndLessToWidest(const SizeAndActionsVec &v) {
+    using namespace LegalizeActions;
     return increaseToLargerTypesAndDecreaseToLargest(v, MoreElements,
-                                                        FewerElements);
+                                                     FewerElements);
   }
 
   /// Helper function to implement many typical SizeChangeStrategy functions.
@@ -259,22 +904,46 @@ public:
                                               LegalizeAction DecreaseAction,
                                               LegalizeAction IncreaseAction);
 
-  /// Determine what action should be taken to legalize the given generic
-  /// instruction opcode, type-index and type. Requires computeTables to have
-  /// been called.
+  /// Get the action definitions for the given opcode. Use this to run a
+  /// LegalityQuery through the definitions.
+  const LegalizeRuleSet &getActionDefinitions(unsigned Opcode) const;
+
+  /// Get the action definition builder for the given opcode. Use this to define
+  /// the action definitions.
   ///
-  /// \returns a pair consisting of the kind of legalization that should be
-  /// performed and the destination type.
-  std::pair<LegalizeAction, LLT> getAction(const InstrAspect &Aspect) const;
+  /// It is an error to request an opcode that has already been requested by the
+  /// multiple-opcode variant.
+  LegalizeRuleSet &getActionDefinitionsBuilder(unsigned Opcode);
+
+  /// Get the action definition builder for the given set of opcodes. Use this
+  /// to define the action definitions for multiple opcodes at once. The first
+  /// opcode given will be considered the representative opcode and will hold
+  /// the definitions whereas the other opcodes will be configured to refer to
+  /// the representative opcode. This lowers memory requirements and very
+  /// slightly improves performance.
+  ///
+  /// It would be very easy to introduce unexpected side-effects as a result of
+  /// this aliasing if it were permitted to request different but intersecting
+  /// sets of opcodes but that is difficult to keep track of. It is therefore an
+  /// error to request the same opcode twice using this API, to request an
+  /// opcode that already has definitions, or to use the single-opcode API on an
+  /// opcode that has already been requested by this API.
+  LegalizeRuleSet &
+  getActionDefinitionsBuilder(std::initializer_list<unsigned> Opcodes);
+  void aliasActionDefinitions(unsigned OpcodeTo, unsigned OpcodeFrom);
+
+  /// Determine what action should be taken to legalize the described
+  /// instruction. Requires computeTables to have been called.
+  ///
+  /// \returns a description of the next legalization step to perform.
+  LegalizeActionStep getAction(const LegalityQuery &Query) const;
 
   /// Determine what action should be taken to legalize the given generic
   /// instruction.
   ///
-  /// \returns a tuple consisting of the LegalizeAction that should be
-  /// performed, the type-index it should be performed on and the destination
-  /// type.
-  std::tuple<LegalizeAction, unsigned, LLT>
-  getAction(const MachineInstr &MI, const MachineRegisterInfo &MRI) const;
+  /// \returns a description of the next legalization step to perform.
+  LegalizeActionStep getAction(const MachineInstr &MI,
+                               const MachineRegisterInfo &MRI) const;
 
   bool isLegal(const MachineInstr &MI, const MachineRegisterInfo &MRI) const;
 
@@ -283,6 +952,15 @@ public:
                               MachineIRBuilder &MIRBuilder) const;
 
 private:
+  /// Determine what action should be taken to legalize the given generic
+  /// instruction opcode, type-index and type. Requires computeTables to have
+  /// been called.
+  ///
+  /// \returns a pair consisting of the kind of legalization that should be
+  /// performed and the destination type.
+  std::pair<LegalizeAction, LLT>
+  getAspectAction(const InstrAspect &Aspect) const;
+
   /// The SizeAndActionsVec is a representation mapping between all natural
   /// numbers and an Action. The natural number represents the bit size of
   /// the InstrAspect. For example, for a target with native support for 32-bit
@@ -350,6 +1028,7 @@ private:
   /// A partial SizeAndActionsVec potentially doesn't cover all bit sizes,
   /// i.e. it's OK if it doesn't start from size 1.
   static void checkPartialSizeAndActionsVector(const SizeAndActionsVec& v) {
+    using namespace LegalizeActions;
 #ifndef NDEBUG
     // The sizes should be in increasing order
     int prev_size = -1;
@@ -441,7 +1120,7 @@ private:
   static const int LastOp = TargetOpcode::PRE_ISEL_GENERIC_OPCODE_END;
 
   // Data structures used temporarily during construction of legality data:
-  typedef DenseMap<LLT, LegalizeAction> TypeMap;
+  using TypeMap = DenseMap<LLT, LegalizeAction>;
   SmallVector<TypeMap, 1> SpecifiedActions[LastOp - FirstOp + 1];
   SmallVector<SizeChangeStrategy, 1>
       ScalarSizeChangeStrategies[LastOp - FirstOp + 1];
@@ -456,8 +1135,16 @@ private:
       AddrSpace2PointerActions[LastOp - FirstOp + 1];
   std::unordered_map<uint16_t, SmallVector<SizeAndActionsVec, 1>>
       NumElements2Actions[LastOp - FirstOp + 1];
+
+  LegalizeRuleSet RulesForOpcode[LastOp - FirstOp + 1];
 };
 
+#ifndef NDEBUG
+/// Checks that MIR is fully legal, returns an illegal instruction if it's not,
+/// nullptr otherwise
+const MachineInstr *machineFunctionIsIllegal(const MachineFunction &MF);
+#endif
+
 } // end namespace llvm.
 
 #endif // LLVM_CODEGEN_GLOBALISEL_LEGALIZERINFO_H
diff --git a/include/llvm/CodeGen/GlobalISel/Localizer.h b/include/llvm/CodeGen/GlobalISel/Localizer.h
index 0a46eb9e7840..1e2d4763e5e1 100644
--- a/include/llvm/CodeGen/GlobalISel/Localizer.h
+++ b/include/llvm/CodeGen/GlobalISel/Localizer.h
@@ -70,6 +70,8 @@ public:
         .set(MachineFunctionProperties::Property::RegBankSelected);
   }
 
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
   bool runOnMachineFunction(MachineFunction &MF) override;
 };
 
diff --git a/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
new file mode 100644
index 000000000000..f77f9a8df7ee
--- /dev/null
+++ b/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
@@ -0,0 +1,338 @@
+//== ----- llvm/CodeGen/GlobalISel/MIPatternMatch.h --------------------- == //
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// Contains matchers for matching SSA Machine Instructions.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_GMIR_PATTERNMATCH_H
+#define LLVM_GMIR_PATTERNMATCH_H
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+namespace llvm {
+namespace MIPatternMatch {
+
+template <typename Reg, typename Pattern>
+bool mi_match(Reg R, MachineRegisterInfo &MRI, Pattern &&P) {
+  return P.match(MRI, R);
+}
+
+// TODO: Extend for N use.
+template <typename SubPatternT> struct OneUse_match {
+  SubPatternT SubPat;
+  OneUse_match(const SubPatternT &SP) : SubPat(SP) {}
+
+  template <typename OpTy>
+  bool match(const MachineRegisterInfo &MRI, unsigned Reg) {
+    return MRI.hasOneUse(Reg) && SubPat.match(MRI, Reg);
+  }
+};
+
+template <typename SubPat>
+inline OneUse_match<SubPat> m_OneUse(const SubPat &SP) {
+  return SP;
+}
+
+struct ConstantMatch {
+  int64_t &CR;
+  ConstantMatch(int64_t &C) : CR(C) {}
+  bool match(const MachineRegisterInfo &MRI, unsigned Reg) {
+    if (auto MaybeCst = getConstantVRegVal(Reg, MRI)) {
+      CR = *MaybeCst;
+      return true;
+    }
+    return false;
+  }
+};
+
+inline ConstantMatch m_ICst(int64_t &Cst) { return ConstantMatch(Cst); }
+
+// TODO: Rework this for different kinds of MachineOperand.
+// Currently assumes the Src for a match is a register.
+// We might want to support taking in some MachineOperands and call getReg on
+// that.
+
+struct operand_type_match {
+  bool match(const MachineRegisterInfo &MRI, unsigned Reg) { return true; }
+  bool match(const MachineRegisterInfo &MRI, MachineOperand *MO) {
+    return MO->isReg();
+  }
+};
+
+inline operand_type_match m_Reg() { return operand_type_match(); }
+
+/// Matching combinators.
+template <typename... Preds> struct And {
+  template <typename MatchSrc>
+  bool match(MachineRegisterInfo &MRI, MatchSrc &&src) {
+    return true;
+  }
+};
+
+template <typename Pred, typename... Preds>
+struct And<Pred, Preds...> : And<Preds...> {
+  Pred P;
+  And(Pred &&p, Preds &&... preds)
+      : And<Preds...>(std::forward<Preds>(preds)...), P(std::forward<Pred>(p)) {
+  }
+  template <typename MatchSrc>
+  bool match(MachineRegisterInfo &MRI, MatchSrc &&src) {
+    return P.match(MRI, src) && And<Preds...>::match(MRI, src);
+  }
+};
+
+template <typename... Preds> struct Or {
+  template <typename MatchSrc>
+  bool match(MachineRegisterInfo &MRI, MatchSrc &&src) {
+    return false;
+  }
+};
+
+template <typename Pred, typename... Preds>
+struct Or<Pred, Preds...> : Or<Preds...> {
+  Pred P;
+  Or(Pred &&p, Preds &&... preds)
+      : Or<Preds...>(std::forward<Preds>(preds)...), P(std::forward<Pred>(p)) {}
+  template <typename MatchSrc>
+  bool match(MachineRegisterInfo &MRI, MatchSrc &&src) {
+    return P.match(MRI, src) || Or<Preds...>::match(MRI, src);
+  }
+};
+
+template <typename... Preds> And<Preds...> m_all_of(Preds &&... preds) {
+  return And<Preds...>(std::forward<Preds>(preds)...);
+}
+
+template <typename... Preds> Or<Preds...> m_any_of(Preds &&... preds) {
+  return Or<Preds...>(std::forward<Preds>(preds)...);
+}
+
+template <typename BindTy> struct bind_helper {
+  static bool bind(const MachineRegisterInfo &MRI, BindTy &VR, BindTy &V) {
+    VR = V;
+    return true;
+  }
+};
+
+template <> struct bind_helper<MachineInstr *> {
+  static bool bind(const MachineRegisterInfo &MRI, MachineInstr *&MI,
+                   unsigned Reg) {
+    MI = MRI.getVRegDef(Reg);
+    if (MI)
+      return true;
+    return false;
+  }
+};
+
+template <> struct bind_helper<LLT> {
+  static bool bind(const MachineRegisterInfo &MRI, LLT &Ty, unsigned Reg) {
+    Ty = MRI.getType(Reg);
+    if (Ty.isValid())
+      return true;
+    return false;
+  }
+};
+
+template <> struct bind_helper<const ConstantFP *> {
+  static bool bind(const MachineRegisterInfo &MRI, const ConstantFP *&F,
+                   unsigned Reg) {
+    F = getConstantFPVRegVal(Reg, MRI);
+    if (F)
+      return true;
+    return false;
+  }
+};
+
+template <typename Class> struct bind_ty {
+  Class &VR;
+
+  bind_ty(Class &V) : VR(V) {}
+
+  template <typename ITy> bool match(const MachineRegisterInfo &MRI, ITy &&V) {
+    return bind_helper<Class>::bind(MRI, VR, V);
+  }
+};
+
+inline bind_ty<unsigned> m_Reg(unsigned &R) { return R; }
+inline bind_ty<MachineInstr *> m_MInstr(MachineInstr *&MI) { return MI; }
+inline bind_ty<LLT> m_Type(LLT &Ty) { return Ty; }
+
+// Helper for matching G_FCONSTANT
+inline bind_ty<const ConstantFP *> m_GFCst(const ConstantFP *&C) { return C; }
+
+// General helper for all the binary generic MI such as G_ADD/G_SUB etc
+template <typename LHS_P, typename RHS_P, unsigned Opcode,
+          bool Commutable = false>
+struct BinaryOp_match {
+  LHS_P L;
+  RHS_P R;
+
+  BinaryOp_match(const LHS_P &LHS, const RHS_P &RHS) : L(LHS), R(RHS) {}
+  template <typename OpTy> bool match(MachineRegisterInfo &MRI, OpTy &&Op) {
+    MachineInstr *TmpMI;
+    if (mi_match(Op, MRI, m_MInstr(TmpMI))) {
+      if (TmpMI->getOpcode() == Opcode && TmpMI->getNumOperands() == 3) {
+        return (L.match(MRI, TmpMI->getOperand(1).getReg()) &&
+                R.match(MRI, TmpMI->getOperand(2).getReg())) ||
+               (Commutable && (R.match(MRI, TmpMI->getOperand(1).getReg()) &&
+                               L.match(MRI, TmpMI->getOperand(2).getReg())));
+      }
+    }
+    return false;
+  }
+};
+
+template <typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, TargetOpcode::G_ADD, true>
+m_GAdd(const LHS &L, const RHS &R) {
+  return BinaryOp_match<LHS, RHS, TargetOpcode::G_ADD, true>(L, R);
+}
+
+template <typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, TargetOpcode::G_SUB> m_GSub(const LHS &L,
+                                                            const RHS &R) {
+  return BinaryOp_match<LHS, RHS, TargetOpcode::G_SUB>(L, R);
+}
+
+template <typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, TargetOpcode::G_MUL, true>
+m_GMul(const LHS &L, const RHS &R) {
+  return BinaryOp_match<LHS, RHS, TargetOpcode::G_MUL, true>(L, R);
+}
+
+template <typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, TargetOpcode::G_FADD, true>
+m_GFAdd(const LHS &L, const RHS &R) {
+  return BinaryOp_match<LHS, RHS, TargetOpcode::G_FADD, true>(L, R);
+}
+
+template <typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, TargetOpcode::G_FMUL, true>
+m_GFMul(const LHS &L, const RHS &R) {
+  return BinaryOp_match<LHS, RHS, TargetOpcode::G_FMUL, true>(L, R);
+}
+
+template <typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, TargetOpcode::G_FSUB, false>
+m_GFSub(const LHS &L, const RHS &R) {
+  return BinaryOp_match<LHS, RHS, TargetOpcode::G_FSUB, false>(L, R);
+}
+
+template <typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, TargetOpcode::G_AND, true>
+m_GAnd(const LHS &L, const RHS &R) {
+  return BinaryOp_match<LHS, RHS, TargetOpcode::G_AND, true>(L, R);
+}
+
+template <typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, TargetOpcode::G_OR, true> m_GOr(const LHS &L,
+                                                                const RHS &R) {
+  return BinaryOp_match<LHS, RHS, TargetOpcode::G_OR, true>(L, R);
+}
+
+// Helper for unary instructions (G_[ZSA]EXT/G_TRUNC) etc
+template <typename SrcTy, unsigned Opcode> struct UnaryOp_match {
+  SrcTy L;
+
+  UnaryOp_match(const SrcTy &LHS) : L(LHS) {}
+  template <typename OpTy> bool match(MachineRegisterInfo &MRI, OpTy &&Op) {
+    MachineInstr *TmpMI;
+    if (mi_match(Op, MRI, m_MInstr(TmpMI))) {
+      if (TmpMI->getOpcode() == Opcode && TmpMI->getNumOperands() == 2) {
+        return L.match(MRI, TmpMI->getOperand(1).getReg());
+      }
+    }
+    return false;
+  }
+};
+
+template <typename SrcTy>
+inline UnaryOp_match<SrcTy, TargetOpcode::G_ANYEXT>
+m_GAnyExt(const SrcTy &Src) {
+  return UnaryOp_match<SrcTy, TargetOpcode::G_ANYEXT>(Src);
+}
+
+template <typename SrcTy>
+inline UnaryOp_match<SrcTy, TargetOpcode::G_SEXT> m_GSExt(const SrcTy &Src) {
+  return UnaryOp_match<SrcTy, TargetOpcode::G_SEXT>(Src);
+}
+
+template <typename SrcTy>
+inline UnaryOp_match<SrcTy, TargetOpcode::G_ZEXT> m_GZExt(const SrcTy &Src) {
+  return UnaryOp_match<SrcTy, TargetOpcode::G_ZEXT>(Src);
+}
+
+template <typename SrcTy>
+inline UnaryOp_match<SrcTy, TargetOpcode::G_FPEXT> m_GFPExt(const SrcTy &Src) {
+  return UnaryOp_match<SrcTy, TargetOpcode::G_FPEXT>(Src);
+}
+
+template <typename SrcTy>
+inline UnaryOp_match<SrcTy, TargetOpcode::G_TRUNC> m_GTrunc(const SrcTy &Src) {
+  return UnaryOp_match<SrcTy, TargetOpcode::G_TRUNC>(Src);
+}
+
+template <typename SrcTy>
+inline UnaryOp_match<SrcTy, TargetOpcode::G_BITCAST>
+m_GBitcast(const SrcTy &Src) {
+  return UnaryOp_match<SrcTy, TargetOpcode::G_BITCAST>(Src);
+}
+
+template <typename SrcTy>
+inline UnaryOp_match<SrcTy, TargetOpcode::G_PTRTOINT>
+m_GPtrToInt(const SrcTy &Src) {
+  return UnaryOp_match<SrcTy, TargetOpcode::G_PTRTOINT>(Src);
+}
+
+template <typename SrcTy>
+inline UnaryOp_match<SrcTy, TargetOpcode::G_INTTOPTR>
+m_GIntToPtr(const SrcTy &Src) {
+  return UnaryOp_match<SrcTy, TargetOpcode::G_INTTOPTR>(Src);
+}
+
+template <typename SrcTy>
+inline UnaryOp_match<SrcTy, TargetOpcode::G_FPTRUNC>
+m_GFPTrunc(const SrcTy &Src) {
+  return UnaryOp_match<SrcTy, TargetOpcode::G_FPTRUNC>(Src);
+}
+
+template <typename SrcTy>
+inline UnaryOp_match<SrcTy, TargetOpcode::G_FABS> m_GFabs(const SrcTy &Src) {
+  return UnaryOp_match<SrcTy, TargetOpcode::G_FABS>(Src);
+}
+
+template <typename SrcTy>
+inline UnaryOp_match<SrcTy, TargetOpcode::G_FNEG> m_GFNeg(const SrcTy &Src) {
+  return UnaryOp_match<SrcTy, TargetOpcode::G_FNEG>(Src);
+}
+
+template <typename SrcTy>
+inline UnaryOp_match<SrcTy, TargetOpcode::COPY> m_Copy(SrcTy &&Src) {
+  return UnaryOp_match<SrcTy, TargetOpcode::COPY>(std::forward<SrcTy>(Src));
+}
+
+// Helper for checking if a Reg is of specific type.
+struct CheckType {
+  LLT Ty;
+  CheckType(const LLT &Ty) : Ty(Ty) {}
+
+  bool match(MachineRegisterInfo &MRI, unsigned Reg) {
+    return MRI.getType(Reg) == Ty;
+  }
+};
+
+inline CheckType m_SpecificType(LLT Ty) { return Ty; }
+
+} // namespace GMIPatternMatch
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index aa875c11d86f..983a4e680d5c 100644
--- a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -23,7 +23,6 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugLoc.h"
 
-#include <queue>
 
 namespace llvm {
 
@@ -32,11 +31,10 @@ class MachineFunction;
 class MachineInstr;
 class TargetInstrInfo;
 
-/// Helper class to build MachineInstr.
-/// It keeps internally the insertion point and debug location for all
-/// the new instructions we want to create.
-/// This information can be modify via the related setters.
-class MachineIRBuilder {
+/// Class which stores all the state required in a MachineIRBuilder.
+/// Since MachineIRBuilders will only store state in this object, it allows
+/// to transfer BuilderState between different kinds of MachineIRBuilders.
+struct MachineIRBuilderState {
   /// MachineFunction under construction.
   MachineFunction *MF;
   /// Information used to access the description of the opcodes.
@@ -53,15 +51,23 @@ class MachineIRBuilder {
   /// @}
 
   std::function<void(MachineInstr *)> InsertedInstr;
+};
+
+/// Helper class to build MachineInstr.
+/// It keeps internally the insertion point and debug location for all
+/// the new instructions we want to create.
+/// This information can be modify via the related setters.
+class MachineIRBuilderBase {
 
+  MachineIRBuilderState State;
   const TargetInstrInfo &getTII() {
-    assert(TII && "TargetInstrInfo is not set");
-    return *TII;
+    assert(State.TII && "TargetInstrInfo is not set");
+    return *State.TII;
   }
 
   void validateTruncExt(unsigned Dst, unsigned Src, bool IsExtend);
-  MachineInstrBuilder buildBinaryOp(unsigned Opcode, unsigned Res, unsigned Op0, unsigned Op1);
 
+protected:
   unsigned getDestFromArg(unsigned Reg) { return Reg; }
   unsigned getDestFromArg(LLT Ty) {
     return getMF().getRegInfo().createGenericVirtualRegister(Ty);
@@ -89,30 +95,41 @@ class MachineIRBuilder {
     return MIB->getOperand(0).getReg();
   }
 
+  void validateBinaryOp(unsigned Res, unsigned Op0, unsigned Op1);
+
 public:
   /// Some constructors for easy use.
-  MachineIRBuilder() = default;
-  MachineIRBuilder(MachineFunction &MF) { setMF(MF); }
-  MachineIRBuilder(MachineInstr &MI) : MachineIRBuilder(*MI.getMF()) {
+  MachineIRBuilderBase() = default;
+  MachineIRBuilderBase(MachineFunction &MF) { setMF(MF); }
+  MachineIRBuilderBase(MachineInstr &MI) : MachineIRBuilderBase(*MI.getMF()) {
     setInstr(MI);
   }
 
+  MachineIRBuilderBase(const MachineIRBuilderState &BState) : State(BState) {}
+
   /// Getter for the function we currently build.
   MachineFunction &getMF() {
-    assert(MF && "MachineFunction is not set");
-    return *MF;
+    assert(State.MF && "MachineFunction is not set");
+    return *State.MF;
   }
 
+  /// Getter for DebugLoc
+  const DebugLoc &getDL() { return State.DL; }
+
+  /// Getter for MRI
+  MachineRegisterInfo *getMRI() { return State.MRI; }
+
+  /// Getter for the State
+  MachineIRBuilderState &getState() { return State; }
+
   /// Getter for the basic block we currently build.
   MachineBasicBlock &getMBB() {
-    assert(MBB && "MachineBasicBlock is not set");
-    return *MBB;
+    assert(State.MBB && "MachineBasicBlock is not set");
+    return *State.MBB;
   }
 
   /// Current insertion point for new instructions.
-  MachineBasicBlock::iterator getInsertPt() {
-    return II;
-  }
+  MachineBasicBlock::iterator getInsertPt() { return State.II; }
 
   /// Set the insertion point before the specified position.
   /// \pre MBB must be in getMF().
@@ -137,15 +154,16 @@ public:
   /// \name Control where instructions we create are recorded (typically for
   /// visiting again later during legalization).
   /// @{
+  void recordInsertion(MachineInstr *InsertedInstr) const;
   void recordInsertions(std::function<void(MachineInstr *)> InsertedInstr);
   void stopRecordingInsertions();
   /// @}
 
   /// Set the debug location to \p DL for all the next build instructions.
-  void setDebugLoc(const DebugLoc &DL) { this->DL = DL; }
+  void setDebugLoc(const DebugLoc &DL) { this->State.DL = DL; }
 
   /// Get the current instruction's debug location.
-  DebugLoc getDebugLoc() { return DL; }
+  DebugLoc getDebugLoc() { return State.DL; }
 
   /// Build and insert <empty> = \p Opcode <empty>.
   /// The insertion point is the one set by the last call of either
@@ -156,20 +174,6 @@ public:
   /// \return a MachineInstrBuilder for the newly created instruction.
   MachineInstrBuilder buildInstr(unsigned Opcode);
 
-  /// DAG like Generic method for building arbitrary instructions as above.
-  /// \Opc opcode for the instruction.
-  /// \Ty Either LLT/TargetRegisterClass/unsigned types for Dst
-  /// \Args Variadic list of uses of types(unsigned/MachineInstrBuilder)
-  /// Uses of type MachineInstrBuilder will perform
-  /// getOperand(0).getReg() to convert to register.
-  template <typename DstTy, typename... UseArgsTy>
-  MachineInstrBuilder buildInstr(unsigned Opc, DstTy &&Ty,
-                                 UseArgsTy &&... Args) {
-    auto MIB = buildInstr(Opc).addDef(getDestFromArg(Ty));
-    addUsesFromArgs(MIB, std::forward<UseArgsTy>(Args)...);
-    return MIB;
-  }
-
   /// Build but don't insert <empty> = \p Opcode <empty>.
   ///
   /// \pre setMF, setBasicBlock or setMI  must have been called.
@@ -227,49 +231,6 @@ public:
   /// \return a MachineInstrBuilder for the newly created instruction.
   MachineInstrBuilder buildGlobalValue(unsigned Res, const GlobalValue *GV);
 
-  /// Build and insert \p Res = G_ADD \p Op0, \p Op1
-  ///
-  /// G_ADD sets \p Res to the sum of integer parameters \p Op0 and \p Op1,
-  /// truncated to their width.
-  ///
-  /// \pre setBasicBlock or setMI must have been called.
-  /// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers
-  ///      with the same (scalar or vector) type).
-  ///
-  /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildAdd(unsigned Res, unsigned Op0,
-                               unsigned Op1);
-  template <typename DstTy, typename... UseArgsTy>
-  MachineInstrBuilder buildAdd(DstTy &&Ty, UseArgsTy &&... UseArgs) {
-    unsigned Res = getDestFromArg(Ty);
-    return buildAdd(Res, (getRegFromArg(UseArgs))...);
-  }
-
-  /// Build and insert \p Res = G_SUB \p Op0, \p Op1
-  ///
-  /// G_SUB sets \p Res to the sum of integer parameters \p Op0 and \p Op1,
-  /// truncated to their width.
-  ///
-  /// \pre setBasicBlock or setMI must have been called.
-  /// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers
-  ///      with the same (scalar or vector) type).
-  ///
-  /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildSub(unsigned Res, unsigned Op0,
-                               unsigned Op1);
-
-  /// Build and insert \p Res = G_MUL \p Op0, \p Op1
-  ///
-  /// G_MUL sets \p Res to the sum of integer parameters \p Op0 and \p Op1,
-  /// truncated to their width.
-  ///
-  /// \pre setBasicBlock or setMI must have been called.
-  /// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers
-  ///      with the same (scalar or vector) type).
-  ///
-  /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildMul(unsigned Res, unsigned Op0,
-                               unsigned Op1);
 
   /// Build and insert \p Res = G_GEP \p Op0, \p Op1
   ///
@@ -338,34 +299,6 @@ public:
   MachineInstrBuilder buildUAdde(unsigned Res, unsigned CarryOut, unsigned Op0,
                                  unsigned Op1, unsigned CarryIn);
 
-  /// Build and insert \p Res = G_AND \p Op0, \p Op1
-  ///
-  /// G_AND sets \p Res to the bitwise and of integer parameters \p Op0 and \p
-  /// Op1.
-  ///
-  /// \pre setBasicBlock or setMI must have been called.
-  /// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers
-  ///      with the same (scalar or vector) type).
-  ///
-  /// \return a MachineInstrBuilder for the newly created instruction.
-  template <typename DstTy, typename... UseArgsTy>
-  MachineInstrBuilder buildAnd(DstTy &&Dst, UseArgsTy &&... UseArgs) {
-    return buildAnd(getDestFromArg(Dst), getRegFromArg(UseArgs)...);
-  }
-  MachineInstrBuilder buildAnd(unsigned Res, unsigned Op0,
-                               unsigned Op1);
-
-  /// Build and insert \p Res = G_OR \p Op0, \p Op1
-  ///
-  /// G_OR sets \p Res to the bitwise or of integer parameters \p Op0 and \p
-  /// Op1.
-  ///
-  /// \pre setBasicBlock or setMI must have been called.
-  /// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers
-  ///      with the same (scalar or vector) type).
-  ///
-  /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildOr(unsigned Res, unsigned Op0, unsigned Op1);
 
   /// Build and insert \p Res = G_ANYEXT \p Op0
   ///
@@ -399,6 +332,10 @@ public:
   /// \pre \p Op must be smaller than \p Res
   ///
   /// \return The newly created instruction.
+  template <typename DstType, typename ArgType>
+  MachineInstrBuilder buildSExt(DstType &&Res, ArgType &&Arg) {
+    return buildSExt(getDestFromArg(Res), getRegFromArg(Arg));
+  }
   MachineInstrBuilder buildSExt(unsigned Res, unsigned Op);
 
   /// Build and insert \p Res = G_ZEXT \p Op
@@ -413,6 +350,10 @@ public:
   /// \pre \p Op must be smaller than \p Res
   ///
   /// \return The newly created instruction.
+  template <typename DstType, typename ArgType>
+  MachineInstrBuilder buildZExt(DstType &&Res, ArgType &&Arg) {
+    return buildZExt(getDestFromArg(Res), getRegFromArg(Arg));
+  }
   MachineInstrBuilder buildZExt(unsigned Res, unsigned Op);
 
   /// Build and insert \p Res = G_SEXT \p Op, \p Res = G_TRUNC \p Op, or
@@ -423,6 +364,10 @@ public:
   /// \pre \p Op must be a generic virtual register with scalar or vector type.
   ///
   /// \return The newly created instruction.
+  template <typename DstTy, typename UseArgTy>
+  MachineInstrBuilder buildSExtOrTrunc(DstTy &&Dst, UseArgTy &&Use) {
+    return buildSExtOrTrunc(getDestFromArg(Dst), getRegFromArg(Use));
+  }
   MachineInstrBuilder buildSExtOrTrunc(unsigned Res, unsigned Op);
 
   /// Build and insert \p Res = G_ZEXT \p Op, \p Res = G_TRUNC \p Op, or
@@ -433,6 +378,10 @@ public:
   /// \pre \p Op must be a generic virtual register with scalar or vector type.
   ///
   /// \return The newly created instruction.
+  template <typename DstTy, typename UseArgTy>
+  MachineInstrBuilder buildZExtOrTrunc(DstTy &&Dst, UseArgTy &&Use) {
+    return buildZExtOrTrunc(getDestFromArg(Dst), getRegFromArg(Use));
+  }
   MachineInstrBuilder buildZExtOrTrunc(unsigned Res, unsigned Op);
 
   // Build and insert \p Res = G_ANYEXT \p Op, \p Res = G_TRUNC \p Op, or
@@ -462,6 +411,10 @@ public:
                                       unsigned Op);
 
   /// Build and insert an appropriate cast between two registers of equal size.
+  template <typename DstType, typename ArgType>
+  MachineInstrBuilder buildCast(DstType &&Res, ArgType &&Arg) {
+    return buildCast(getDestFromArg(Res), getRegFromArg(Arg));
+  }
   MachineInstrBuilder buildCast(unsigned Dst, unsigned Src);
 
   /// Build and insert G_BR \p Dest
@@ -471,7 +424,7 @@ public:
   /// \pre setBasicBlock or setMI must have been called.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildBr(MachineBasicBlock &BB);
+  MachineInstrBuilder buildBr(MachineBasicBlock &Dest);
 
   /// Build and insert G_BRCOND \p Tst, \p Dest
   ///
@@ -485,7 +438,7 @@ public:
   ///      depend on bit 0 (for now).
   ///
   /// \return The newly created instruction.
-  MachineInstrBuilder buildBrCond(unsigned Tst, MachineBasicBlock &BB);
+  MachineInstrBuilder buildBrCond(unsigned Tst, MachineBasicBlock &Dest);
 
   /// Build and insert G_BRINDIRECT \p Tgt
   ///
@@ -532,8 +485,18 @@ public:
   /// \pre \p Res must be a generic virtual register with scalar type.
   ///
   /// \return The newly created instruction.
+  template <typename DstType>
+  MachineInstrBuilder buildFConstant(DstType &&Res, const ConstantFP &Val) {
+    return buildFConstant(getDestFromArg(Res), Val);
+  }
   MachineInstrBuilder buildFConstant(unsigned Res, const ConstantFP &Val);
 
+  template <typename DstType>
+  MachineInstrBuilder buildFConstant(DstType &&Res, double Val) {
+    return buildFConstant(getDestFromArg(Res), Val);
+  }
+  MachineInstrBuilder buildFConstant(unsigned Res, double Val);
+
   /// Build and insert \p Res = COPY Op
   ///
   /// Register-to-register COPY sets \p Res to \p Op.
@@ -559,6 +522,18 @@ public:
   MachineInstrBuilder buildLoad(unsigned Res, unsigned Addr,
                                 MachineMemOperand &MMO);
 
+  /// Build and insert `Res = <opcode> Addr, MMO`.
+  ///
+  /// Loads the value stored at \p Addr. Puts the result in \p Res.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p Res must be a generic virtual register.
+  /// \pre \p Addr must be a generic virtual register with pointer type.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildLoadInstr(unsigned Opcode, unsigned Res,
+                                     unsigned Addr, MachineMemOperand &MMO);
+
   /// Build and insert `G_STORE Val, Addr, MMO`.
   ///
   /// Stores the value \p Val to \p Addr.
@@ -580,7 +555,10 @@ public:
   MachineInstrBuilder buildExtract(unsigned Res, unsigned Src, uint64_t Index);
 
   /// Build and insert \p Res = IMPLICIT_DEF.
-  MachineInstrBuilder buildUndef(unsigned Dst);
+  template <typename DstType> MachineInstrBuilder buildUndef(DstType &&Res) {
+    return buildUndef(getDestFromArg(Res));
+  }
+  MachineInstrBuilder buildUndef(unsigned Res);
 
   /// Build and insert instructions to put \p Ops together at the specified p
   /// Indices to form a larger register.
@@ -649,6 +627,10 @@ public:
   /// \pre \p Res must be smaller than \p Op
   ///
   /// \return The newly created instruction.
+  template <typename DstType, typename SrcType>
+  MachineInstrBuilder buildFPTrunc(DstType &&Res, SrcType &&Src) {
+    return buildFPTrunc(getDestFromArg(Res), getRegFromArg(Src));
+  }
   MachineInstrBuilder buildFPTrunc(unsigned Res, unsigned Op);
 
   /// Build and insert \p Res = G_TRUNC \p Op
@@ -735,7 +717,28 @@ public:
   MachineInstrBuilder buildExtractVectorElement(unsigned Res, unsigned Val,
                                                 unsigned Idx);
 
-  /// Build and insert `OldValRes = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal,
+  /// Build and insert `OldValRes<def>, SuccessRes<def> =
+  /// G_ATOMIC_CMPXCHG_WITH_SUCCESS Addr, CmpVal, NewVal, MMO`.
+  ///
+  /// Atomically replace the value at \p Addr with \p NewVal if it is currently
+  /// \p CmpVal otherwise leaves it unchanged. Puts the original value from \p
+  /// Addr in \p Res, along with an s1 indicating whether it was replaced.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p OldValRes must be a generic virtual register of scalar type.
+  /// \pre \p SuccessRes must be a generic virtual register of scalar type. It
+  ///      will be assigned 0 on failure and 1 on success.
+  /// \pre \p Addr must be a generic virtual register with pointer type.
+  /// \pre \p OldValRes, \p CmpVal, and \p NewVal must be generic virtual
+  ///      registers of the same type.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder
+  buildAtomicCmpXchgWithSuccess(unsigned OldValRes, unsigned SuccessRes,
+                                unsigned Addr, unsigned CmpVal, unsigned NewVal,
+                                MachineMemOperand &MMO);
+
+  /// Build and insert `OldValRes<def> = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal,
   /// MMO`.
   ///
   /// Atomically replace the value at \p Addr with \p NewVal if it is currently
@@ -752,6 +755,328 @@ public:
   MachineInstrBuilder buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr,
                                          unsigned CmpVal, unsigned NewVal,
                                          MachineMemOperand &MMO);
+
+  /// Build and insert `OldValRes<def> = G_ATOMICRMW_<Opcode> Addr, Val, MMO`.
+  ///
+  /// Atomically read-modify-update the value at \p Addr with \p Val. Puts the
+  /// original value from \p Addr in \p OldValRes. The modification is
+  /// determined by the opcode.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p OldValRes must be a generic virtual register.
+  /// \pre \p Addr must be a generic virtual register with pointer type.
+  /// \pre \p OldValRes, and \p Val must be generic virtual registers of the
+  ///      same type.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildAtomicRMW(unsigned Opcode, unsigned OldValRes,
+                                     unsigned Addr, unsigned Val,
+                                     MachineMemOperand &MMO);
+
+  /// Build and insert `OldValRes<def> = G_ATOMICRMW_XCHG Addr, Val, MMO`.
+  ///
+  /// Atomically replace the value at \p Addr with \p Val. Puts the original
+  /// value from \p Addr in \p OldValRes.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p OldValRes must be a generic virtual register.
+  /// \pre \p Addr must be a generic virtual register with pointer type.
+  /// \pre \p OldValRes, and \p Val must be generic virtual registers of the
+  ///      same type.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildAtomicRMWXchg(unsigned OldValRes, unsigned Addr,
+                                         unsigned Val, MachineMemOperand &MMO);
+
+  /// Build and insert `OldValRes<def> = G_ATOMICRMW_ADD Addr, Val, MMO`.
+  ///
+  /// Atomically replace the value at \p Addr with the addition of \p Val and
+  /// the original value. Puts the original value from \p Addr in \p OldValRes.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p OldValRes must be a generic virtual register.
+  /// \pre \p Addr must be a generic virtual register with pointer type.
+  /// \pre \p OldValRes, and \p Val must be generic virtual registers of the
+  ///      same type.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildAtomicRMWAdd(unsigned OldValRes, unsigned Addr,
+                                         unsigned Val, MachineMemOperand &MMO);
+
+  /// Build and insert `OldValRes<def> = G_ATOMICRMW_SUB Addr, Val, MMO`.
+  ///
+  /// Atomically replace the value at \p Addr with the subtraction of \p Val and
+  /// the original value. Puts the original value from \p Addr in \p OldValRes.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p OldValRes must be a generic virtual register.
+  /// \pre \p Addr must be a generic virtual register with pointer type.
+  /// \pre \p OldValRes, and \p Val must be generic virtual registers of the
+  ///      same type.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildAtomicRMWSub(unsigned OldValRes, unsigned Addr,
+                                         unsigned Val, MachineMemOperand &MMO);
+
+  /// Build and insert `OldValRes<def> = G_ATOMICRMW_AND Addr, Val, MMO`.
+  ///
+  /// Atomically replace the value at \p Addr with the bitwise and of \p Val and
+  /// the original value. Puts the original value from \p Addr in \p OldValRes.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p OldValRes must be a generic virtual register.
+  /// \pre \p Addr must be a generic virtual register with pointer type.
+  /// \pre \p OldValRes, and \p Val must be generic virtual registers of the
+  ///      same type.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildAtomicRMWAnd(unsigned OldValRes, unsigned Addr,
+                                         unsigned Val, MachineMemOperand &MMO);
+
+  /// Build and insert `OldValRes<def> = G_ATOMICRMW_NAND Addr, Val, MMO`.
+  ///
+  /// Atomically replace the value at \p Addr with the bitwise nand of \p Val
+  /// and the original value. Puts the original value from \p Addr in \p
+  /// OldValRes.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p OldValRes must be a generic virtual register.
+  /// \pre \p Addr must be a generic virtual register with pointer type.
+  /// \pre \p OldValRes, and \p Val must be generic virtual registers of the
+  ///      same type.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildAtomicRMWNand(unsigned OldValRes, unsigned Addr,
+                                         unsigned Val, MachineMemOperand &MMO);
+
+  /// Build and insert `OldValRes<def> = G_ATOMICRMW_OR Addr, Val, MMO`.
+  ///
+  /// Atomically replace the value at \p Addr with the bitwise or of \p Val and
+  /// the original value. Puts the original value from \p Addr in \p OldValRes.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p OldValRes must be a generic virtual register.
+  /// \pre \p Addr must be a generic virtual register with pointer type.
+  /// \pre \p OldValRes, and \p Val must be generic virtual registers of the
+  ///      same type.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildAtomicRMWOr(unsigned OldValRes, unsigned Addr,
+                                       unsigned Val, MachineMemOperand &MMO);
+
+  /// Build and insert `OldValRes<def> = G_ATOMICRMW_XOR Addr, Val, MMO`.
+  ///
+  /// Atomically replace the value at \p Addr with the bitwise xor of \p Val and
+  /// the original value. Puts the original value from \p Addr in \p OldValRes.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p OldValRes must be a generic virtual register.
+  /// \pre \p Addr must be a generic virtual register with pointer type.
+  /// \pre \p OldValRes, and \p Val must be generic virtual registers of the
+  ///      same type.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildAtomicRMWXor(unsigned OldValRes, unsigned Addr,
+                                        unsigned Val, MachineMemOperand &MMO);
+
+  /// Build and insert `OldValRes<def> = G_ATOMICRMW_MAX Addr, Val, MMO`.
+  ///
+  /// Atomically replace the value at \p Addr with the signed maximum of \p
+  /// Val and the original value. Puts the original value from \p Addr in \p
+  /// OldValRes.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p OldValRes must be a generic virtual register.
+  /// \pre \p Addr must be a generic virtual register with pointer type.
+  /// \pre \p OldValRes, and \p Val must be generic virtual registers of the
+  ///      same type.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildAtomicRMWMax(unsigned OldValRes, unsigned Addr,
+                                        unsigned Val, MachineMemOperand &MMO);
+
+  /// Build and insert `OldValRes<def> = G_ATOMICRMW_MIN Addr, Val, MMO`.
+  ///
+  /// Atomically replace the value at \p Addr with the signed minimum of \p
+  /// Val and the original value. Puts the original value from \p Addr in \p
+  /// OldValRes.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p OldValRes must be a generic virtual register.
+  /// \pre \p Addr must be a generic virtual register with pointer type.
+  /// \pre \p OldValRes, and \p Val must be generic virtual registers of the
+  ///      same type.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildAtomicRMWMin(unsigned OldValRes, unsigned Addr,
+                                        unsigned Val, MachineMemOperand &MMO);
+
+  /// Build and insert `OldValRes<def> = G_ATOMICRMW_UMAX Addr, Val, MMO`.
+  ///
+  /// Atomically replace the value at \p Addr with the unsigned maximum of \p
+  /// Val and the original value. Puts the original value from \p Addr in \p
+  /// OldValRes.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p OldValRes must be a generic virtual register.
+  /// \pre \p Addr must be a generic virtual register with pointer type.
+  /// \pre \p OldValRes, and \p Val must be generic virtual registers of the
+  ///      same type.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildAtomicRMWUmax(unsigned OldValRes, unsigned Addr,
+                                         unsigned Val, MachineMemOperand &MMO);
+
+  /// Build and insert `OldValRes<def> = G_ATOMICRMW_UMIN Addr, Val, MMO`.
+  ///
+  /// Atomically replace the value at \p Addr with the unsigned minimum of \p
+  /// Val and the original value. Puts the original value from \p Addr in \p
+  /// OldValRes.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p OldValRes must be a generic virtual register.
+  /// \pre \p Addr must be a generic virtual register with pointer type.
+  /// \pre \p OldValRes, and \p Val must be generic virtual registers of the
+  ///      same type.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildAtomicRMWUmin(unsigned OldValRes, unsigned Addr,
+                                         unsigned Val, MachineMemOperand &MMO);
+};
+
+/// A CRTP class that contains methods for building instructions that can
+/// be constant folded. MachineIRBuilders that want to inherit from this will
+/// need to implement buildBinaryOp (for constant folding binary ops).
+/// Alternatively, they can implement buildInstr(Opc, Dst, Uses...) to perform
+/// additional folding for Opc.
+template <typename Base>
+class FoldableInstructionsBuilder : public MachineIRBuilderBase {
+  Base &base() { return static_cast<Base &>(*this); }
+
+public:
+  using MachineIRBuilderBase::MachineIRBuilderBase;
+  /// Build and insert \p Res = G_ADD \p Op0, \p Op1
+  ///
+  /// G_ADD sets \p Res to the sum of integer parameters \p Op0 and \p Op1,
+  /// truncated to their width.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers
+  ///      with the same (scalar or vector) type).
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+
+  MachineInstrBuilder buildAdd(unsigned Dst, unsigned Src0, unsigned Src1) {
+    return base().buildBinaryOp(TargetOpcode::G_ADD, Dst, Src0, Src1);
+  }
+  template <typename DstTy, typename... UseArgsTy>
+  MachineInstrBuilder buildAdd(DstTy &&Ty, UseArgsTy &&... UseArgs) {
+    unsigned Res = base().getDestFromArg(Ty);
+    return base().buildAdd(Res, (base().getRegFromArg(UseArgs))...);
+  }
+
+  /// Build and insert \p Res = G_SUB \p Op0, \p Op1
+  ///
+  /// G_SUB sets \p Res to the sum of integer parameters \p Op0 and \p Op1,
+  /// truncated to their width.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers
+  ///      with the same (scalar or vector) type).
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+
+  MachineInstrBuilder buildSub(unsigned Dst, unsigned Src0, unsigned Src1) {
+    return base().buildBinaryOp(TargetOpcode::G_SUB, Dst, Src0, Src1);
+  }
+  template <typename DstTy, typename... UseArgsTy>
+  MachineInstrBuilder buildSub(DstTy &&Ty, UseArgsTy &&... UseArgs) {
+    unsigned Res = base().getDestFromArg(Ty);
+    return base().buildSub(Res, (base().getRegFromArg(UseArgs))...);
+  }
+
+  /// Build and insert \p Res = G_MUL \p Op0, \p Op1
+  ///
+  /// G_MUL sets \p Res to the sum of integer parameters \p Op0 and \p Op1,
+  /// truncated to their width.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers
+  ///      with the same (scalar or vector) type).
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildMul(unsigned Dst, unsigned Src0, unsigned Src1) {
+    return base().buildBinaryOp(TargetOpcode::G_MUL, Dst, Src0, Src1);
+  }
+  template <typename DstTy, typename... UseArgsTy>
+  MachineInstrBuilder buildMul(DstTy &&Ty, UseArgsTy &&... UseArgs) {
+    unsigned Res = base().getDestFromArg(Ty);
+    return base().buildMul(Res, (base().getRegFromArg(UseArgs))...);
+  }
+
+  /// Build and insert \p Res = G_AND \p Op0, \p Op1
+  ///
+  /// G_AND sets \p Res to the bitwise and of integer parameters \p Op0 and \p
+  /// Op1.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers
+  ///      with the same (scalar or vector) type).
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+
+  MachineInstrBuilder buildAnd(unsigned Dst, unsigned Src0, unsigned Src1) {
+    return base().buildBinaryOp(TargetOpcode::G_AND, Dst, Src0, Src1);
+  }
+  template <typename DstTy, typename... UseArgsTy>
+  MachineInstrBuilder buildAnd(DstTy &&Ty, UseArgsTy &&... UseArgs) {
+    unsigned Res = base().getDestFromArg(Ty);
+    return base().buildAnd(Res, (base().getRegFromArg(UseArgs))...);
+  }
+
+  /// Build and insert \p Res = G_OR \p Op0, \p Op1
+  ///
+  /// G_OR sets \p Res to the bitwise or of integer parameters \p Op0 and \p
+  /// Op1.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers
+  ///      with the same (scalar or vector) type).
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildOr(unsigned Dst, unsigned Src0, unsigned Src1) {
+    return base().buildBinaryOp(TargetOpcode::G_OR, Dst, Src0, Src1);
+  }
+  template <typename DstTy, typename... UseArgsTy>
+  MachineInstrBuilder buildOr(DstTy &&Ty, UseArgsTy &&... UseArgs) {
+    unsigned Res = base().getDestFromArg(Ty);
+    return base().buildOr(Res, (base().getRegFromArg(UseArgs))...);
+  }
+};
+
+class MachineIRBuilder : public FoldableInstructionsBuilder<MachineIRBuilder> {
+public:
+  using FoldableInstructionsBuilder<
+      MachineIRBuilder>::FoldableInstructionsBuilder;
+  MachineInstrBuilder buildBinaryOp(unsigned Opcode, unsigned Dst,
+                                    unsigned Src0, unsigned Src1) {
+    validateBinaryOp(Dst, Src0, Src1);
+    return buildInstr(Opcode).addDef(Dst).addUse(Src0).addUse(Src1);
+  }
+  using FoldableInstructionsBuilder<MachineIRBuilder>::buildInstr;
+  /// DAG like Generic method for building arbitrary instructions as above.
+  /// \Opc opcode for the instruction.
+  /// \Ty Either LLT/TargetRegisterClass/unsigned types for Dst
+  /// \Args Variadic list of uses of types(unsigned/MachineInstrBuilder)
+  /// Uses of type MachineInstrBuilder will perform
+  /// getOperand(0).getReg() to convert to register.
+  template <typename DstTy, typename... UseArgsTy>
+  MachineInstrBuilder buildInstr(unsigned Opc, DstTy &&Ty,
+                                 UseArgsTy &&... Args) {
+    auto MIB = buildInstr(Opc).addDef(getDestFromArg(Ty));
+    addUsesFromArgs(MIB, std::forward<UseArgsTy>(Args)...);
+    return MIB;
+  }
 };
 
 } // End namespace llvm.
diff --git a/include/llvm/CodeGen/GlobalISel/RegBankSelect.h b/include/llvm/CodeGen/GlobalISel/RegBankSelect.h
index 676955c33fe9..c53ae416e60b 100644
--- a/include/llvm/CodeGen/GlobalISel/RegBankSelect.h
+++ b/include/llvm/CodeGen/GlobalISel/RegBankSelect.h
@@ -22,7 +22,7 @@
 /// of an instruction should live. It asks the target which banks may be
 /// used for each operand of the instruction and what is the cost. Then,
 /// it chooses the solution which minimize the cost of the instruction plus
-/// the cost of any move that may be needed to to the values into the right
+/// the cost of any move that may be needed to the values into the right
 /// register bank.
 /// In other words, the cost for an instruction on a register bank RegBank
 /// is: Cost of I on RegBank plus the sum of the cost for bringing the
diff --git a/include/llvm/CodeGen/GlobalISel/RegisterBank.h b/include/llvm/CodeGen/GlobalISel/RegisterBank.h
index 5d758423f4e7..d5612e17393c 100644
--- a/include/llvm/CodeGen/GlobalISel/RegisterBank.h
+++ b/include/llvm/CodeGen/GlobalISel/RegisterBank.h
@@ -42,7 +42,7 @@ private:
 
 public:
   RegisterBank(unsigned ID, const char *Name, unsigned Size,
-               const uint32_t *ContainedRegClasses, unsigned NumRegClasses);
+               const uint32_t *CoveredClasses, unsigned NumRegClasses);
 
   /// Get the identifier of this register bank.
   unsigned getID() const { return ID; }
diff --git a/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h b/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
index 02868b220984..82fd7eddb68a 100644
--- a/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
+++ b/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
@@ -622,6 +622,8 @@ public:
   /// \pre \p Reg is a virtual register that either has a bank or a class.
   /// \returns The constrained register class, or nullptr if there is none.
   /// \note This is a generic variant of MachineRegisterInfo::constrainRegClass
+  /// \note Use MachineRegisterInfo::constrainRegAttrs instead for any non-isel
+  /// purpose, including non-select passes of GlobalISel
   static const TargetRegisterClass *
   constrainGenericRegister(unsigned Reg, const TargetRegisterClass &RC,
                            MachineRegisterInfo &MRI);
diff --git a/include/llvm/CodeGen/GlobalISel/Utils.h b/include/llvm/CodeGen/GlobalISel/Utils.h
index 5864c15cc8eb..51e3a2732972 100644
--- a/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -19,8 +19,10 @@
 
 namespace llvm {
 
+class AnalysisUsage;
 class MachineFunction;
 class MachineInstr;
+class MachineOperand;
 class MachineOptimizationRemarkEmitter;
 class MachineOptimizationRemarkMissed;
 class MachineRegisterInfo;
@@ -32,6 +34,7 @@ class TargetRegisterInfo;
 class TargetRegisterClass;
 class Twine;
 class ConstantFP;
+class APFloat;
 
 /// Try to constrain Reg to the specified register class. If this fails,
 /// create a new virtual register in the correct class and insert a COPY before
@@ -57,8 +60,21 @@ unsigned constrainOperandRegClass(const MachineFunction &MF,
                                   const TargetInstrInfo &TII,
                                   const RegisterBankInfo &RBI,
                                   MachineInstr &InsertPt, const MCInstrDesc &II,
-                                  unsigned Reg, unsigned OpIdx);
+                                  const MachineOperand &RegMO, unsigned OpIdx);
 
+/// Mutate the newly-selected instruction \p I to constrain its (possibly
+/// generic) virtual register operands to the instruction's register class.
+/// This could involve inserting COPYs before (for uses) or after (for defs).
+/// This requires the number of operands to match the instruction description.
+/// \returns whether operand regclass constraining succeeded.
+///
+// FIXME: Not all instructions have the same number of operands. We should
+// probably expose a constrain helper per operand and let the target selector
+// constrain individual registers, like fast-isel.
+bool constrainSelectedInstRegOperands(MachineInstr &I,
+                                      const TargetInstrInfo &TII,
+                                      const TargetRegisterInfo &TRI,
+                                      const RegisterBankInfo &RBI);
 /// Check whether an instruction \p MI is dead: it only defines dead virtual
 /// registers, and doesn't have other side effects.
 bool isTriviallyDead(const MachineInstr &MI, const MachineRegisterInfo &MRI);
@@ -85,5 +101,12 @@ const ConstantFP* getConstantFPVRegVal(unsigned VReg,
 MachineInstr *getOpcodeDef(unsigned Opcode, unsigned Reg,
                            const MachineRegisterInfo &MRI);
 
+/// Returns an APFloat from Val converted to the appropriate size.
+APFloat getAPFloatFromSize(double Val, unsigned Size);
+
+/// Modify analysis usage so it preserves passes required for the SelectionDAG
+/// fallback.
+void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU);
+
 } // End namespace llvm.
 #endif
diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h
index d256849be9af..80bd796d5374 100644
--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@@ -377,6 +377,8 @@ namespace ISD {
     /// When the 1st operand is a vector, the shift amount must be in the same
     /// type. (TLI.getShiftAmountTy() will return the same type when the input
     /// type is a vector.)
+    /// For rotates, the shift amount is treated as an unsigned amount modulo
+    /// the element size of the first operand.
     SHL, SRA, SRL, ROTL, ROTR,
 
     /// Byte Swap and Counting operators.
@@ -412,19 +414,11 @@ namespace ISD {
     /// then the result type must also be a vector type.
     SETCC,
 
-    /// Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, and
-    /// op #2 is a *carry value*. This operator checks the result of
-    /// "LHS - RHS - Carry", and can be used to compare two wide integers:
-    /// (setcce lhshi rhshi (subc lhslo rhslo) cc). Only valid for integers.
-    /// FIXME: This node is deprecated in favor of SETCCCARRY.
-    /// It is kept around for now to provide a smooth transition path
-    /// toward the use of SETCCCARRY and will eventually be removed.
-    SETCCE,
-
     /// Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but
     /// op #2 is a boolean indicating if there is an incoming carry. This
     /// operator checks the result of "LHS - RHS - Carry", and can be used to
-    /// compare two wide integers: (setcce lhshi rhshi (subc lhslo rhslo) cc).
+    /// compare two wide integers:
+    /// (setcccarry lhshi rhshi (subcarry lhslo rhslo) cc).
     /// Only valid for integers.
     SETCCCARRY,
 
@@ -495,7 +489,8 @@ namespace ISD {
     ZERO_EXTEND_VECTOR_INREG,
 
     /// FP_TO_[US]INT - Convert a floating point value to a signed or unsigned
-    /// integer.
+    /// integer. These have the same semantics as fptosi and fptoui in IR. If
+    /// the FP value cannot fit in the integer type, the results are undefined.
     FP_TO_SINT,
     FP_TO_UINT,
 
@@ -779,6 +774,7 @@ namespace ISD {
     ATOMIC_LOAD_ADD,
     ATOMIC_LOAD_SUB,
     ATOMIC_LOAD_AND,
+    ATOMIC_LOAD_CLR,
     ATOMIC_LOAD_OR,
     ATOMIC_LOAD_XOR,
     ATOMIC_LOAD_NAND,
diff --git a/include/llvm/CodeGen/LatencyPriorityQueue.h b/include/llvm/CodeGen/LatencyPriorityQueue.h
index 988e6d6cb3a3..9b8d83ce77ca 100644
--- a/include/llvm/CodeGen/LatencyPriorityQueue.h
+++ b/include/llvm/CodeGen/LatencyPriorityQueue.h
@@ -17,6 +17,7 @@
 #define LLVM_CODEGEN_LATENCYPRIORITYQUEUE_H
 
 #include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Config/llvm-config.h"
 
 namespace llvm {
   class LatencyPriorityQueue;
@@ -26,7 +27,7 @@ namespace llvm {
     LatencyPriorityQueue *PQ;
     explicit latency_sort(LatencyPriorityQueue *pq) : PQ(pq) {}
 
-    bool operator()(const SUnit* left, const SUnit* right) const;
+    bool operator()(const SUnit* LHS, const SUnit* RHS) const;
   };
 
   class LatencyPriorityQueue : public SchedulingPriorityQueue {
@@ -83,11 +84,15 @@ namespace llvm {
 
     void remove(SUnit *SU) override;
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+    LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override;
+#endif
+
     // scheduledNode - As nodes are scheduled, we look to see if there are any
     // successor nodes that have a single unscheduled predecessor.  If so, that
     // single predecessor has a higher priority, since scheduling it will make
     // the node available.
-    void scheduledNode(SUnit *Node) override;
+    void scheduledNode(SUnit *SU) override;
 
 private:
     void AdjustPriorityOfUnscheduledPreds(SUnit *SU);
diff --git a/include/llvm/CodeGen/LazyMachineBlockFrequencyInfo.h b/include/llvm/CodeGen/LazyMachineBlockFrequencyInfo.h
index 848ee1dc0dc6..221f16a03f16 100644
--- a/include/llvm/CodeGen/LazyMachineBlockFrequencyInfo.h
+++ b/include/llvm/CodeGen/LazyMachineBlockFrequencyInfo.h
@@ -23,7 +23,7 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 
 namespace llvm {
-/// \brief This is an alternative analysis pass to MachineBlockFrequencyInfo.
+/// This is an alternative analysis pass to MachineBlockFrequencyInfo.
 /// The difference is that with this pass, the block frequencies are not
 /// computed when the analysis pass is executed but rather when the BFI result
 /// is explicitly requested by the analysis client.
@@ -49,7 +49,7 @@ private:
   /// The function.
   MachineFunction *MF = nullptr;
 
-  /// \brief Calculate MBFI and all other analyses that's not available and
+  /// Calculate MBFI and all other analyses that's not available and
   /// required by BFI.
   MachineBlockFrequencyInfo &calculateIfNotAvailable() const;
 
@@ -58,10 +58,10 @@ public:
 
   LazyMachineBlockFrequencyInfoPass();
 
-  /// \brief Compute and return the block frequencies.
+  /// Compute and return the block frequencies.
   MachineBlockFrequencyInfo &getBFI() { return calculateIfNotAvailable(); }
 
-  /// \brief Compute and return the block frequencies.
+  /// Compute and return the block frequencies.
   const MachineBlockFrequencyInfo &getBFI() const {
     return calculateIfNotAvailable();
   }
diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h
index f4fa872c7f5b..cdf9ad2588cf 100644
--- a/include/llvm/CodeGen/LiveInterval.h
+++ b/include/llvm/CodeGen/LiveInterval.h
@@ -326,7 +326,7 @@ namespace llvm {
     /// createDeadDef - Make sure the range has a value defined at Def.
     /// If one already exists, return it. Otherwise allocate a new value and
     /// add liveness for a dead def.
-    VNInfo *createDeadDef(SlotIndex Def, VNInfo::Allocator &VNInfoAllocator);
+    VNInfo *createDeadDef(SlotIndex Def, VNInfo::Allocator &VNIAlloc);
 
     /// Create a def of value @p VNI. Return @p VNI. If there already exists
     /// a definition at VNI->def, the value defined there must be @p VNI.
@@ -454,7 +454,7 @@ namespace llvm {
     /// overlapsFrom - Return true if the intersection of the two live ranges
     /// is not empty.  The specified iterator is a hint that we can begin
     /// scanning the Other range starting at I.
-    bool overlapsFrom(const LiveRange &Other, const_iterator I) const;
+    bool overlapsFrom(const LiveRange &Other, const_iterator StartPos) const;
 
     /// Returns true if all segments of the @p Other live range are completely
     /// covered by this live range.
@@ -482,7 +482,7 @@ namespace llvm {
     /// @p Use, return {nullptr, false}. If there is an "undef" before @p Use,
     /// return {nullptr, true}.
     std::pair<VNInfo*,bool> extendInBlock(ArrayRef<SlotIndex> Undefs,
-        SlotIndex StartIdx, SlotIndex Use);
+        SlotIndex StartIdx, SlotIndex Kill);
 
     /// Simplified version of the above "extendInBlock", which assumes that
     /// no register lanes are undefined by <def,read-undef> operands.
@@ -609,7 +609,7 @@ namespace llvm {
     void print(raw_ostream &OS) const;
     void dump() const;
 
-    /// \brief Walk the range and assert if any invariants fail to hold.
+    /// Walk the range and assert if any invariants fail to hold.
     ///
     /// Note that this is a no-op when asserts are disabled.
 #ifdef NDEBUG
@@ -791,7 +791,7 @@ namespace llvm {
     ///    L00E0 and L0010 and the L000F lane into L0007 and L0008. The Mod
     ///    function will be applied to the L0010 and L0008 subranges.
     void refineSubRanges(BumpPtrAllocator &Allocator, LaneBitmask LaneMask,
-                         std::function<void(LiveInterval::SubRange&)> Mod);
+                         std::function<void(LiveInterval::SubRange&)> Apply);
 
     bool operator<(const LiveInterval& other) const {
       const SlotIndex &thisIndex = beginIndex();
@@ -802,7 +802,7 @@ namespace llvm {
     void print(raw_ostream &OS) const;
     void dump() const;
 
-    /// \brief Walks the interval and assert if any invariants fail to hold.
+    /// Walks the interval and assert if any invariants fail to hold.
     ///
     /// Note that this is a no-op when asserts are disabled.
 #ifdef NDEBUG
diff --git a/include/llvm/CodeGen/LiveIntervalUnion.h b/include/llvm/CodeGen/LiveIntervalUnion.h
index b922e543c856..9e2799bd4414 100644
--- a/include/llvm/CodeGen/LiveIntervalUnion.h
+++ b/include/llvm/CodeGen/LiveIntervalUnion.h
@@ -154,7 +154,7 @@ public:
         unsigned MaxInterferingRegs = std::numeric_limits<unsigned>::max());
 
     // Was this virtual register visited during collectInterferingVRegs?
-    bool isSeenInterference(LiveInterval *VReg) const;
+    bool isSeenInterference(LiveInterval *VirtReg) const;
 
     // Did collectInterferingVRegs collect all interferences?
     bool seenAllInterferences() const { return SeenAllInterferences; }
diff --git a/include/llvm/CodeGen/LiveIntervals.h b/include/llvm/CodeGen/LiveIntervals.h
index 1150f3c1c47b..291a07a712cb 100644
--- a/include/llvm/CodeGen/LiveIntervals.h
+++ b/include/llvm/CodeGen/LiveIntervals.h
@@ -105,7 +105,7 @@ class VirtRegMap;
     /// Calculate the spill weight to assign to a single instruction.
     static float getSpillWeight(bool isDef, bool isUse,
                                 const MachineBlockFrequencyInfo *MBFI,
-                                const MachineInstr &Instr);
+                                const MachineInstr &MI);
 
     /// Calculate the spill weight to assign to a single instruction.
     static float getSpillWeight(bool isDef, bool isUse,
@@ -462,6 +462,10 @@ class VirtRegMap;
     void computeRegUnitRange(LiveRange&, unsigned Unit);
     void computeVirtRegInterval(LiveInterval&);
 
+    using ShrinkToUsesWorkList = SmallVector<std::pair<SlotIndex, VNInfo*>, 16>;
+    void extendSegmentsToUses(LiveRange &Segments,
+                              ShrinkToUsesWorkList &WorkList, unsigned Reg,
+                              LaneBitmask LaneMask);
 
     /// Helper function for repairIntervalsInRange(), walks backwards and
     /// creates/modifies live segments in \p LR to match the operands found.
diff --git a/include/llvm/CodeGen/LivePhysRegs.h b/include/llvm/CodeGen/LivePhysRegs.h
index f9aab0d09e1f..301a45066b4c 100644
--- a/include/llvm/CodeGen/LivePhysRegs.h
+++ b/include/llvm/CodeGen/LivePhysRegs.h
@@ -44,7 +44,7 @@ class MachineOperand;
 class MachineRegisterInfo;
 class raw_ostream;
 
-/// \brief A set of physical registers with utility functions to track liveness
+/// A set of physical registers with utility functions to track liveness
 /// when walking backward/forward through a basic block.
 class LivePhysRegs {
   const TargetRegisterInfo *TRI = nullptr;
@@ -84,7 +84,7 @@ public:
       LiveRegs.insert(*SubRegs);
   }
 
-  /// \brief Removes a physical register, all its sub-registers, and all its
+  /// Removes a physical register, all its sub-registers, and all its
   /// super-registers from the set.
   void removeReg(unsigned Reg) {
     assert(TRI && "LivePhysRegs is not initialized.");
@@ -98,7 +98,7 @@ public:
         SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> *Clobbers =
         nullptr);
 
-  /// \brief Returns true if register \p Reg is contained in the set. This also
+  /// Returns true if register \p Reg is contained in the set. This also
   /// works if only the super register of \p Reg has been defined, because
   /// addReg() always adds all sub-registers to the set as well.
   /// Note: Returns false if just some sub registers are live, use available()
@@ -155,7 +155,7 @@ public:
   void dump() const;
 
 private:
-  /// \brief Adds live-in registers from basic block \p MBB, taking associated
+  /// Adds live-in registers from basic block \p MBB, taking associated
   /// lane masks into consideration.
   void addBlockLiveIns(const MachineBasicBlock &MBB);
 
@@ -169,7 +169,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, const LivePhysRegs& LR) {
   return OS;
 }
 
-/// \brief Computes registers live-in to \p MBB assuming all of its successors
+/// Computes registers live-in to \p MBB assuming all of its successors
 /// live-in lists are up-to-date. Puts the result into the given LivePhysReg
 /// instance \p LiveRegs.
 void computeLiveIns(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB);
@@ -185,6 +185,13 @@ void addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs);
 void computeAndAddLiveIns(LivePhysRegs &LiveRegs,
                           MachineBasicBlock &MBB);
 
+/// Convenience function for recomputing live-in's for \p MBB.
+static inline void recomputeLiveIns(MachineBasicBlock &MBB) {
+  LivePhysRegs LPR;
+  MBB.clearLiveIns();
+  computeAndAddLiveIns(LPR, MBB);
+}
+
 } // end namespace llvm
 
 #endif // LLVM_CODEGEN_LIVEPHYSREGS_H
diff --git a/include/llvm/CodeGen/LiveRangeEdit.h b/include/llvm/CodeGen/LiveRangeEdit.h
index 84bccde0caa2..53830297c525 100644
--- a/include/llvm/CodeGen/LiveRangeEdit.h
+++ b/include/llvm/CodeGen/LiveRangeEdit.h
@@ -117,10 +117,13 @@ private:
   /// registers are created.
   void MRI_NoteNewVirtualRegister(unsigned VReg) override;
 
-  /// \brief Check if MachineOperand \p MO is a last use/kill either in the
+  /// Check if MachineOperand \p MO is a last use/kill either in the
   /// main live range of \p LI or in one of the matching subregister ranges.
   bool useIsKill(const LiveInterval &LI, const MachineOperand &MO) const;
 
+  /// Create a new empty interval based on OldReg.
+  LiveInterval &createEmptyIntervalFrom(unsigned OldReg, bool createSubRanges);
+
 public:
   /// Create a LiveRangeEdit for breaking down parent into smaller pieces.
   /// @param parent The register being spilled or split.
@@ -174,16 +177,13 @@ public:
     return makeArrayRef(NewRegs).slice(FirstNew);
   }
 
-  /// createEmptyIntervalFrom - Create a new empty interval based on OldReg.
-  LiveInterval &createEmptyIntervalFrom(unsigned OldReg);
-
   /// createFrom - Create a new virtual register based on OldReg.
   unsigned createFrom(unsigned OldReg);
 
   /// create - Create a new register with the same class and original slot as
   /// parent.
   LiveInterval &createEmptyInterval() {
-    return createEmptyIntervalFrom(getReg());
+    return createEmptyIntervalFrom(getReg(), true);
   }
 
   unsigned create() { return createFrom(getReg()); }
@@ -233,12 +233,6 @@ public:
     return Rematted.count(ParentVNI);
   }
 
-  void markDeadRemat(MachineInstr *inst) {
-    // DeadRemats is an optional field.
-    if (DeadRemats)
-      DeadRemats->insert(inst);
-  }
-
   /// eraseVirtReg - Notify the delegate that Reg is no longer in use, and try
   /// to erase it from LIS.
   void eraseVirtReg(unsigned Reg);
diff --git a/include/llvm/CodeGen/LiveRegMatrix.h b/include/llvm/CodeGen/LiveRegMatrix.h
index fa6827f6b1f9..f62a55c73085 100644
--- a/include/llvm/CodeGen/LiveRegMatrix.h
+++ b/include/llvm/CodeGen/LiveRegMatrix.h
@@ -107,6 +107,13 @@ public:
   /// with the highest enum value is returned.
   InterferenceKind checkInterference(LiveInterval &VirtReg, unsigned PhysReg);
 
+  /// Check for interference in the segment [Start, End) that may prevent
+  /// assignment to PhysReg. If this function returns true, there is
+  /// interference in the segment [Start, End) of some other interval already
+  /// assigned to PhysReg. If this function returns false, PhysReg is free at
+  /// the segment [Start, End).
+  bool checkInterference(SlotIndex Start, SlotIndex End, unsigned PhysReg);
+
   /// Assign VirtReg to PhysReg.
   /// This will mark VirtReg's live range as occupied in the LiveRegMatrix and
   /// update VirtRegMap. The live range is expected to be available in PhysReg.
diff --git a/include/llvm/CodeGen/LiveRegUnits.h b/include/llvm/CodeGen/LiveRegUnits.h
index dc4956da9637..249545906e01 100644
--- a/include/llvm/CodeGen/LiveRegUnits.h
+++ b/include/llvm/CodeGen/LiveRegUnits.h
@@ -16,6 +16,7 @@
 #define LLVM_CODEGEN_LIVEREGUNITS_H
 
 #include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/MC/LaneBitmask.h"
 #include "llvm/MC/MCRegisterInfo.h"
@@ -40,6 +41,36 @@ public:
     init(TRI);
   }
 
+  /// For a machine instruction \p MI, adds all register units used in
+  /// \p UsedRegUnits and defined or clobbered in \p ModifiedRegUnits. This is
+  /// useful when walking over a range of instructions to track registers
+  /// used or defined seperately.
+  static void accumulateUsedDefed(const MachineInstr &MI,
+                                  LiveRegUnits &ModifiedRegUnits,
+                                  LiveRegUnits &UsedRegUnits,
+                                  const TargetRegisterInfo *TRI) {
+    for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
+      if (O->isRegMask())
+        ModifiedRegUnits.addRegsInMask(O->getRegMask());
+      if (!O->isReg())
+        continue;
+      unsigned Reg = O->getReg();
+      if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+        continue;
+      if (O->isDef()) {
+        // Some architectures (e.g. AArch64 XZR/WZR) have registers that are
+        // constant and may be used as destinations to indicate the generated
+        // value is discarded. No need to track such case as a def.
+        if (!TRI->isConstantPhysReg(Reg))
+          ModifiedRegUnits.addReg(Reg);
+      } else {
+        assert(O->isUse() && "Reg operand not a def and not a use");
+        UsedRegUnits.addReg(Reg);
+      }
+    }
+    return;
+  }
+
   /// Initialize and clear the set.
   void init(const TargetRegisterInfo &TRI) {
     this->TRI = &TRI;
@@ -59,7 +90,7 @@ public:
       Units.set(*Unit);
   }
 
-  /// \brief Adds register units covered by physical register \p Reg that are
+  /// Adds register units covered by physical register \p Reg that are
   /// part of the lanemask \p Mask.
   void addRegMasked(unsigned Reg, LaneBitmask Mask) {
     for (MCRegUnitMaskIterator Unit(Reg, TRI); Unit.isValid(); ++Unit) {
diff --git a/include/llvm/CodeGen/LoopTraversal.h b/include/llvm/CodeGen/LoopTraversal.h
new file mode 100644
index 000000000000..750da0143c0d
--- /dev/null
+++ b/include/llvm/CodeGen/LoopTraversal.h
@@ -0,0 +1,116 @@
+//==------ llvm/CodeGen/LoopTraversal.h - Loop Traversal -*- C++ -*---------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Loop Traversal logic.
+///
+/// This class provides the basic blocks traversal order used by passes like
+/// ReachingDefAnalysis and ExecutionDomainFix.
+/// It identifies basic blocks that are part of loops and should to be visited
+/// twice and returns efficient traversal order for all the blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LOOPTRAVERSAL_H
+#define LLVM_CODEGEN_LOOPTRAVERSAL_H
+
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+
+class MachineBasicBlock;
+class MachineFunction;
+
+/// This class provides the basic blocks traversal order used by passes like
+/// ReachingDefAnalysis and ExecutionDomainFix.
+/// It identifies basic blocks that are part of loops and should to be visited
+/// twice and returns efficient traversal order for all the blocks.
+///
+/// We want to visit every instruction in every basic block in order to update
+/// it's execution domain or collect clearance information. However, for the
+/// clearance calculation, we need to know clearances from all predecessors
+/// (including any backedges), therfore we need to visit some blocks twice.
+/// As an example, consider the following loop.
+///
+///
+///    PH -> A -> B (xmm<Undef> -> xmm<Def>) -> C -> D -> EXIT
+///          ^                                  |
+///          +----------------------------------+
+///
+/// The iteration order this pass will return is as follows:
+/// Optimized: PH A B C A' B' C' D
+///
+/// The basic block order is constructed as follows:
+/// Once we finish processing some block, we update the counters in MBBInfos
+/// and re-process any successors that are now 'done'.
+/// We call a block that is ready for its final round of processing `done`
+/// (isBlockDone), e.g. when all predecessor information is known.
+///
+/// Note that a naive traversal order would be to do two complete passes over
+/// all basic blocks/instructions, the first for recording clearances, the
+/// second for updating clearance based on backedges.
+/// However, for functions without backedges, or functions with a lot of
+/// straight-line code, and a small loop, that would be a lot of unnecessary
+/// work (since only the BBs that are part of the loop require two passes).
+///
+/// E.g., the naive iteration order for the above exmple is as follows:
+/// Naive: PH A B C D A' B' C' D'
+///
+/// In the optimized approach we avoid processing D twice, because we
+/// can entirely process the predecessors before getting to D.
+class LoopTraversal {
+private:
+  struct MBBInfo {
+    /// Whether we have gotten to this block in primary processing yet.
+    bool PrimaryCompleted = false;
+
+    /// The number of predecessors for which primary processing has completed
+    unsigned IncomingProcessed = 0;
+
+    /// The value of `IncomingProcessed` at the start of primary processing
+    unsigned PrimaryIncoming = 0;
+
+    /// The number of predecessors for which all processing steps are done.
+    unsigned IncomingCompleted = 0;
+
+    MBBInfo() = default;
+  };
+  using MBBInfoMap = SmallVector<MBBInfo, 4>;
+  /// Helps keep track if we proccessed this block and all its predecessors.
+  MBBInfoMap MBBInfos;
+
+public:
+  struct TraversedMBBInfo {
+    /// The basic block.
+    MachineBasicBlock *MBB = nullptr;
+
+    /// True if this is the first time we process the basic block.
+    bool PrimaryPass = true;
+
+    /// True if the block that is ready for its final round of processing.
+    bool IsDone = true;
+
+    TraversedMBBInfo(MachineBasicBlock *BB = nullptr, bool Primary = true,
+                     bool Done = true)
+        : MBB(BB), PrimaryPass(Primary), IsDone(Done) {}
+  };
+  LoopTraversal() {}
+
+  /// Identifies basic blocks that are part of loops and should to be
+  ///  visited twice and returns efficient traversal order for all the blocks.
+  typedef SmallVector<TraversedMBBInfo, 4> TraversalOrder;
+  TraversalOrder traverse(MachineFunction &MF);
+
+private:
+  /// Returens true if the block is ready for its final round of processing.
+  bool isBlockDone(MachineBasicBlock *MBB);
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_LOOPTRAVERSAL_H
diff --git a/include/llvm/CodeGen/MIRParser/MIRParser.h b/include/llvm/CodeGen/MIRParser/MIRParser.h
index b631a8c0122a..e199a1f69ad7 100644
--- a/include/llvm/CodeGen/MIRParser/MIRParser.h
+++ b/include/llvm/CodeGen/MIRParser/MIRParser.h
@@ -45,7 +45,7 @@ public:
   /// \returns nullptr if a parsing error occurred.
   std::unique_ptr<Module> parseIRModule();
 
-  /// \brief Parses MachineFunctions in the MIR file and add them to the given
+  /// Parses MachineFunctions in the MIR file and add them to the given
   /// MachineModuleInfo \p MMI.
   ///
   /// \returns true if an error occurred.
diff --git a/include/llvm/CodeGen/MIRPrinter.h b/include/llvm/CodeGen/MIRPrinter.h
index c73adc3f2b11..078c4b2f6072 100644
--- a/include/llvm/CodeGen/MIRPrinter.h
+++ b/include/llvm/CodeGen/MIRPrinter.h
@@ -38,7 +38,7 @@ void printMIR(raw_ostream &OS, const MachineFunction &MF);
 /// this funciton and the parser will use this function to construct a list if
 /// it is missing.
 void guessSuccessors(const MachineBasicBlock &MBB,
-                     SmallVectorImpl<MachineBasicBlock*> &Successors,
+                     SmallVectorImpl<MachineBasicBlock*> &Result,
                      bool &IsFallthrough);
 
 } // end namespace llvm
diff --git a/include/llvm/CodeGen/MIRYamlMapping.h b/include/llvm/CodeGen/MIRYamlMapping.h
index ba40e522e261..7f46406c4789 100644
--- a/include/llvm/CodeGen/MIRYamlMapping.h
+++ b/include/llvm/CodeGen/MIRYamlMapping.h
@@ -258,11 +258,11 @@ template <> struct MappingTraits<MachineStackObject> {
     YamlIO.mapOptional("callee-saved-restored", Object.CalleeSavedRestored,
                        true);
     YamlIO.mapOptional("local-offset", Object.LocalOffset, Optional<int64_t>());
-    YamlIO.mapOptional("di-variable", Object.DebugVar,
+    YamlIO.mapOptional("debug-info-variable", Object.DebugVar,
                        StringValue()); // Don't print it out when it's empty.
-    YamlIO.mapOptional("di-expression", Object.DebugExpr,
+    YamlIO.mapOptional("debug-info-expression", Object.DebugExpr,
                        StringValue()); // Don't print it out when it's empty.
-    YamlIO.mapOptional("di-location", Object.DebugLoc,
+    YamlIO.mapOptional("debug-info-location", Object.DebugLoc,
                        StringValue()); // Don't print it out when it's empty.
   }
 
@@ -283,6 +283,9 @@ struct FixedMachineStackObject {
   bool IsAliased = false;
   StringValue CalleeSavedRegister;
   bool CalleeSavedRestored = true;
+  StringValue DebugVar;
+  StringValue DebugExpr;
+  StringValue DebugLoc;
 
   bool operator==(const FixedMachineStackObject &Other) const {
     return ID == Other.ID && Type == Other.Type && Offset == Other.Offset &&
@@ -290,7 +293,9 @@ struct FixedMachineStackObject {
            StackID == Other.StackID &&
            IsImmutable == Other.IsImmutable && IsAliased == Other.IsAliased &&
            CalleeSavedRegister == Other.CalleeSavedRegister &&
-           CalleeSavedRestored == Other.CalleeSavedRestored;
+           CalleeSavedRestored == Other.CalleeSavedRestored &&
+           DebugVar == Other.DebugVar && DebugExpr == Other.DebugExpr
+           && DebugLoc == Other.DebugLoc;
   }
 };
 
@@ -321,6 +326,12 @@ template <> struct MappingTraits<FixedMachineStackObject> {
                        StringValue()); // Don't print it out when it's empty.
     YamlIO.mapOptional("callee-saved-restored", Object.CalleeSavedRestored,
                      true);
+    YamlIO.mapOptional("debug-info-variable", Object.DebugVar,
+                       StringValue()); // Don't print it out when it's empty.
+    YamlIO.mapOptional("debug-info-expression", Object.DebugExpr,
+                       StringValue()); // Don't print it out when it's empty.
+    YamlIO.mapOptional("debug-info-location", Object.DebugLoc,
+                       StringValue()); // Don't print it out when it's empty.
   }
 
   static const bool flow = true;
@@ -417,6 +428,7 @@ struct MachineFrameInfo {
   bool HasOpaqueSPAdjustment = false;
   bool HasVAStart = false;
   bool HasMustTailInVarArgFunc = false;
+  unsigned LocalFrameSize = 0;
   StringValue SavePoint;
   StringValue RestorePoint;
 
@@ -434,6 +446,7 @@ struct MachineFrameInfo {
            HasOpaqueSPAdjustment == Other.HasOpaqueSPAdjustment &&
            HasVAStart == Other.HasVAStart &&
            HasMustTailInVarArgFunc == Other.HasMustTailInVarArgFunc &&
+           LocalFrameSize == Other.LocalFrameSize &&
            SavePoint == Other.SavePoint && RestorePoint == Other.RestorePoint;
   }
 };
@@ -457,6 +470,7 @@ template <> struct MappingTraits<MachineFrameInfo> {
     YamlIO.mapOptional("hasVAStart", MFI.HasVAStart, false);
     YamlIO.mapOptional("hasMustTailInVarArgFunc", MFI.HasMustTailInVarArgFunc,
                        false);
+    YamlIO.mapOptional("localFrameSize", MFI.LocalFrameSize, (unsigned)0);
     YamlIO.mapOptional("savePoint", MFI.SavePoint,
                        StringValue()); // Don't print it out when it's empty.
     YamlIO.mapOptional("restorePoint", MFI.RestorePoint,
@@ -472,6 +486,7 @@ struct MachineFunction {
   bool Legalized = false;
   bool RegBankSelected = false;
   bool Selected = false;
+  bool FailedISel = false;
   // Register information
   bool TracksRegLiveness = false;
   std::vector<VirtualRegisterDefinition> VirtualRegisters;
@@ -495,6 +510,7 @@ template <> struct MappingTraits<MachineFunction> {
     YamlIO.mapOptional("legalized", MF.Legalized, false);
     YamlIO.mapOptional("regBankSelected", MF.RegBankSelected, false);
     YamlIO.mapOptional("selected", MF.Selected, false);
+    YamlIO.mapOptional("failedISel", MF.FailedISel, false);
     YamlIO.mapOptional("tracksRegLiveness", MF.TracksRegLiveness, false);
     YamlIO.mapOptional("registers", MF.VirtualRegisters,
                        std::vector<VirtualRegisterDefinition>());
diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h
index 0c9110cbaa87..ace33efd8713 100644
--- a/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/include/llvm/CodeGen/MachineBasicBlock.h
@@ -58,7 +58,7 @@ private:
 public:
   void addNodeToList(MachineInstr *N);
   void removeNodeFromList(MachineInstr *N);
-  void transferNodesFromList(ilist_traits &OldList, instr_iterator First,
+  void transferNodesFromList(ilist_traits &FromList, instr_iterator First,
                              instr_iterator Last);
   void deleteNode(MachineInstr *MI);
 };
@@ -115,13 +115,18 @@ private:
   /// branch.
   bool AddressTaken = false;
 
+  /// Indicate that this basic block is the entry block of an EH scope, i.e.,
+  /// the block that used to have a catchpad or cleanuppad instruction in the
+  /// LLVM IR.
+  bool IsEHScopeEntry = false;
+
   /// Indicate that this basic block is the entry block of an EH funclet.
   bool IsEHFuncletEntry = false;
 
   /// Indicate that this basic block is the entry block of a cleanup funclet.
   bool IsCleanupFuncletEntry = false;
 
-  /// \brief since getSymbol is a relatively heavy-weight operation, the symbol
+  /// since getSymbol is a relatively heavy-weight operation, the symbol
   /// is only computed once and is cached.
   mutable MCSymbol *CachedMCSymbol = nullptr;
 
@@ -225,6 +230,14 @@ public:
     return make_range(getFirstTerminator(), end());
   }
 
+  /// Returns a range that iterates over the phis in the basic block.
+  inline iterator_range<iterator> phis() {
+    return make_range(begin(), getFirstNonPHI());
+  }
+  inline iterator_range<const_iterator> phis() const {
+    return const_cast<MachineBasicBlock *>(this)->phis();
+  }
+
   // Machine-CFG iterators
   using pred_iterator = std::vector<MachineBasicBlock *>::iterator;
   using const_pred_iterator = std::vector<MachineBasicBlock *>::const_iterator;
@@ -367,6 +380,14 @@ public:
 
   bool hasEHPadSuccessor() const;
 
+  /// Returns true if this is the entry block of an EH scope, i.e., the block
+  /// that used to have a catchpad or cleanuppad instruction in the LLVM IR.
+  bool isEHScopeEntry() const { return IsEHScopeEntry; }
+
+  /// Indicates if this is the entry block of an EH scope, i.e., the block that
+  /// that used to have a catchpad or cleanuppad instruction in the LLVM IR.
+  void setIsEHScopeEntry(bool V = true) { IsEHScopeEntry = V; }
+
   /// Returns true if this is the entry block of an EH funclet.
   bool isEHFuncletEntry() const { return IsEHFuncletEntry; }
 
@@ -449,6 +470,18 @@ public:
   /// Replace successor OLD with NEW and update probability info.
   void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New);
 
+  /// Copy a successor (and any probability info) from original block to this
+  /// block's. Uses an iterator into the original blocks successors.
+  ///
+  /// This is useful when doing a partial clone of successors. Afterward, the
+  /// probabilities may need to be normalized.
+  void copySuccessor(MachineBasicBlock *Orig, succ_iterator I);
+
+  /// Split the old successor into old plus new and updates the probability
+  /// info.
+  void splitSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New,
+                      bool NormalizeSuccProbs = false);
+
   /// Transfers all the successors from MBB to this machine basic block (i.e.,
   /// copies all the successors FromMBB and remove all the successors from
   /// FromMBB).
@@ -546,7 +579,7 @@ public:
   /// Check if the edge between this block and the given successor \p
   /// Succ, can be split. If this returns true a subsequent call to
   /// SplitCriticalEdge is guaranteed to return a valid basic block if
-  /// no changes occured in the meantime.
+  /// no changes occurred in the meantime.
   bool canSplitCriticalEdge(const MachineBasicBlock *Succ) const;
 
   void pop_front() { Insts.pop_front(); }
@@ -685,12 +718,19 @@ public:
                             bool IsCond);
 
   /// Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE
-  /// instructions.  Return UnknownLoc if there is none.
+  /// and DBG_LABEL instructions.  Return UnknownLoc if there is none.
   DebugLoc findDebugLoc(instr_iterator MBBI);
   DebugLoc findDebugLoc(iterator MBBI) {
     return findDebugLoc(MBBI.getInstrIterator());
   }
 
+  /// Find the previous valid DebugLoc preceding MBBI, skipping and DBG_VALUE
+  /// instructions.  Return UnknownLoc if there is none.
+  DebugLoc findPrevDebugLoc(instr_iterator MBBI);
+  DebugLoc findPrevDebugLoc(iterator MBBI) {
+    return findPrevDebugLoc(MBBI.getInstrIterator());
+  }
+
   /// Find and return the merged DebugLoc of the branch instructions of the
   /// block. Return UnknownLoc if there is none.
   DebugLoc findBranchDebugLoc();
@@ -717,9 +757,10 @@ public:
 
   // Debugging methods.
   void dump() const;
-  void print(raw_ostream &OS, const SlotIndexes* = nullptr) const;
+  void print(raw_ostream &OS, const SlotIndexes * = nullptr,
+             bool IsStandalone = true) const;
   void print(raw_ostream &OS, ModuleSlotTracker &MST,
-             const SlotIndexes* = nullptr) const;
+             const SlotIndexes * = nullptr, bool IsStandalone = true) const;
 
   // Printing method used by LoopInfo.
   void printAsOperand(raw_ostream &OS, bool PrintType = true) const;
@@ -874,7 +915,7 @@ public:
 /// const_instr_iterator} and the respective reverse iterators.
 template<typename IterT>
 inline IterT skipDebugInstructionsForward(IterT It, IterT End) {
-  while (It != End && It->isDebugValue())
+  while (It != End && It->isDebugInstr())
     It++;
   return It;
 }
@@ -885,7 +926,7 @@ inline IterT skipDebugInstructionsForward(IterT It, IterT End) {
 /// const_instr_iterator} and the respective reverse iterators.
 template<class IterT>
 inline IterT skipDebugInstructionsBackward(IterT It, IterT Begin) {
-  while (It != Begin && It->isDebugValue())
+  while (It != Begin && It->isDebugInstr())
     It--;
   return It;
 }
diff --git a/include/llvm/CodeGen/MachineConstantPool.h b/include/llvm/CodeGen/MachineConstantPool.h
index 1705a0f7e59b..b0b5420a884b 100644
--- a/include/llvm/CodeGen/MachineConstantPool.h
+++ b/include/llvm/CodeGen/MachineConstantPool.h
@@ -63,7 +63,7 @@ inline raw_ostream &operator<<(raw_ostream &OS,
 /// This class is a data container for one entry in a MachineConstantPool.
 /// It contains a pointer to the value and an offset from the start of
 /// the constant pool.
-/// @brief An entry in a MachineConstantPool
+/// An entry in a MachineConstantPool
 class MachineConstantPoolEntry {
 public:
   /// The constant itself.
@@ -117,7 +117,7 @@ public:
 /// the use of MO_ConstantPoolIndex values.  When emitting assembly or machine
 /// code, these virtual address references are converted to refer to the
 /// address of the function constant pool values.
-/// @brief The machine constant pool.
+/// The machine constant pool.
 class MachineConstantPool {
   unsigned PoolAlignment;       ///< The alignment for the pool.
   std::vector<MachineConstantPoolEntry> Constants; ///< The pool of constants.
@@ -128,7 +128,7 @@ class MachineConstantPool {
   const DataLayout &getDataLayout() const { return DL; }
 
 public:
-  /// @brief The only constructor.
+  /// The only constructor.
   explicit MachineConstantPool(const DataLayout &DL)
       : PoolAlignment(1), DL(DL) {}
   ~MachineConstantPool();
diff --git a/include/llvm/CodeGen/MachineDominanceFrontier.h b/include/llvm/CodeGen/MachineDominanceFrontier.h
index ffbcc62bfa36..75d75bc3669a 100644
--- a/include/llvm/CodeGen/MachineDominanceFrontier.h
+++ b/include/llvm/CodeGen/MachineDominanceFrontier.h
@@ -37,9 +37,9 @@ public:
 
  MachineDominanceFrontier();
 
- DominanceFrontierBase<MachineBasicBlock, false> &getBase() { return Base; }
+ ForwardDominanceFrontierBase<MachineBasicBlock> &getBase() { return Base; }
 
-  const SmallVectorImpl<MachineBasicBlock *> &getRoots() const {
+ const SmallVectorImpl<MachineBasicBlock *> &getRoots() const {
    return Base.getRoots();
   }
 
diff --git a/include/llvm/CodeGen/MachineDominators.h b/include/llvm/CodeGen/MachineDominators.h
index 98fdb51aae2f..e3d3d169db97 100644
--- a/include/llvm/CodeGen/MachineDominators.h
+++ b/include/llvm/CodeGen/MachineDominators.h
@@ -45,7 +45,7 @@ using MachineDomTreeNode = DomTreeNodeBase<MachineBasicBlock>;
 /// compute a normal dominator tree.
 ///
 class MachineDominatorTree : public MachineFunctionPass {
-  /// \brief Helper structure used to hold all the basic blocks
+  /// Helper structure used to hold all the basic blocks
   /// involved in the split of a critical edge.
   struct CriticalEdge {
     MachineBasicBlock *FromBB;
@@ -53,12 +53,12 @@ class MachineDominatorTree : public MachineFunctionPass {
     MachineBasicBlock *NewBB;
   };
 
-  /// \brief Pile up all the critical edges to be split.
+  /// Pile up all the critical edges to be split.
   /// The splitting of a critical edge is local and thus, it is possible
   /// to apply several of those changes at the same time.
   mutable SmallVector<CriticalEdge, 32> CriticalEdgesToSplit;
 
-  /// \brief Remember all the basic blocks that are inserted during
+  /// Remember all the basic blocks that are inserted during
   /// edge splitting.
   /// Invariant: NewBBs == all the basic blocks contained in the NewBB
   /// field of all the elements of CriticalEdgesToSplit.
@@ -69,7 +69,7 @@ class MachineDominatorTree : public MachineFunctionPass {
   /// The DominatorTreeBase that is used to compute a normal dominator tree
   std::unique_ptr<DomTreeBase<MachineBasicBlock>> DT;
 
-  /// \brief Apply all the recorded critical edges to the DT.
+  /// Apply all the recorded critical edges to the DT.
   /// This updates the underlying DT information in a way that uses
   /// the fast query path of DT as much as possible.
   ///
@@ -228,7 +228,7 @@ public:
 
   void print(raw_ostream &OS, const Module*) const override;
 
-  /// \brief Record that the critical edge (FromBB, ToBB) has been
+  /// Record that the critical edge (FromBB, ToBB) has been
   /// split with NewBB.
   /// This is best to use this method instead of directly update the
   /// underlying information, because this helps mitigating the
@@ -249,12 +249,6 @@ public:
            "A basic block inserted via edge splitting cannot appear twice");
     CriticalEdgesToSplit.push_back({FromBB, ToBB, NewBB});
   }
-
-  /// \brief Verify the correctness of the domtree by re-computing it.
-  ///
-  /// This should only be used for debugging as it aborts the program if the
-  /// verification fails.
-  void verifyDomTree() const;
 };
 
 //===-------------------------------------
diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h
index f887517217e1..2d6081f3577d 100644
--- a/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/include/llvm/CodeGen/MachineFrameInfo.h
@@ -85,9 +85,23 @@ public:
 /// stack offsets of the object, eliminating all MO_FrameIndex operands from
 /// the program.
 ///
-/// @brief Abstract Stack Frame Information
+/// Abstract Stack Frame Information
 class MachineFrameInfo {
+public:
+  /// Stack Smashing Protection (SSP) rules require that vulnerable stack
+  /// allocations are located close the stack protector.
+  enum SSPLayoutKind {
+    SSPLK_None,       ///< Did not trigger a stack protector.  No effect on data
+                      ///< layout.
+    SSPLK_LargeArray, ///< Array or nested array >= SSP-buffer-size.  Closest
+                      ///< to the stack protector.
+    SSPLK_SmallArray, ///< Array or nested array < SSP-buffer-size. 2nd closest
+                      ///< to the stack protector.
+    SSPLK_AddrOf      ///< The address of this allocation is exposed and
+                      ///< triggered protection.  3rd closest to the protector.
+  };
 
+private:
   // Represent a single object allocated on the stack.
   struct StackObject {
     // The offset of this object from the stack pointer on entry to
@@ -123,6 +137,9 @@ class MachineFrameInfo {
     /// necessarily reside in the same contiguous memory block as other stack
     /// objects. Objects with differing stack IDs should not be merged or
     /// replaced substituted for each other.
+    //
+    /// It is assumed a target uses consecutive, increasing stack IDs starting
+    /// from 1.
     uint8_t StackID;
 
     /// If this stack object is originated from an Alloca instruction
@@ -145,12 +162,15 @@ class MachineFrameInfo {
     /// If true, the object has been zero-extended.
     bool isSExt = false;
 
+    uint8_t SSPLayout;
+
     StackObject(uint64_t Size, unsigned Alignment, int64_t SPOffset,
                 bool IsImmutable, bool IsSpillSlot, const AllocaInst *Alloca,
                 bool IsAliased, uint8_t StackID = 0)
       : SPOffset(SPOffset), Size(Size), Alignment(Alignment),
         isImmutable(IsImmutable), isSpillSlot(IsSpillSlot),
-        StackID(StackID), Alloca(Alloca), isAliased(IsAliased) {}
+        StackID(StackID), Alloca(Alloca), isAliased(IsAliased),
+        SSPLayout(SSPLK_None) {}
   };
 
   /// The alignment of the stack.
@@ -485,6 +505,20 @@ public:
     Objects[ObjectIdx+NumFixedObjects].SPOffset = SPOffset;
   }
 
+  SSPLayoutKind getObjectSSPLayout(int ObjectIdx) const {
+    assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
+           "Invalid Object Idx!");
+    return (SSPLayoutKind)Objects[ObjectIdx+NumFixedObjects].SSPLayout;
+  }
+
+  void setObjectSSPLayout(int ObjectIdx, SSPLayoutKind Kind) {
+    assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
+           "Invalid Object Idx!");
+    assert(!isDeadObjectIndex(ObjectIdx) &&
+           "Setting SSP layout for a dead object?");
+    Objects[ObjectIdx+NumFixedObjects].SSPLayout = Kind;
+  }
+
   /// Return the number of bytes that must be allocated to hold
   /// all of the fixed size frame objects.  This is only valid after
   /// Prolog/Epilog code insertion has finalized the stack frame layout.
diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h
index 7d8b7ebe8d62..e8a4d529faac 100644
--- a/include/llvm/CodeGen/MachineFunction.h
+++ b/include/llvm/CodeGen/MachineFunction.h
@@ -73,6 +73,7 @@ class SlotIndexes;
 class TargetMachine;
 class TargetRegisterClass;
 class TargetSubtargetInfo;
+struct WasmEHFuncInfo;
 struct WinEHFuncInfo;
 
 template <> struct ilist_alloc_traits<MachineBasicBlock> {
@@ -80,8 +81,8 @@ template <> struct ilist_alloc_traits<MachineBasicBlock> {
 };
 
 template <> struct ilist_callback_traits<MachineBasicBlock> {
-  void addNodeToList(MachineBasicBlock* MBB);
-  void removeNodeFromList(MachineBasicBlock* MBB);
+  void addNodeToList(MachineBasicBlock* N);
+  void removeNodeFromList(MachineBasicBlock* N);
 
   template <class Iterator>
   void transferNodesFromList(ilist_callback_traits &OldList, Iterator, Iterator) {
@@ -96,7 +97,7 @@ template <> struct ilist_callback_traits<MachineBasicBlock> {
 struct MachineFunctionInfo {
   virtual ~MachineFunctionInfo();
 
-  /// \brief Factory function: default behavior is to call new using the
+  /// Factory function: default behavior is to call new using the
   /// supplied allocator.
   ///
   /// This function can be overridden in a derive class.
@@ -245,6 +246,10 @@ class MachineFunction {
   // Keep track of jump tables for switch instructions
   MachineJumpTableInfo *JumpTableInfo;
 
+  // Keeps track of Wasm exception handling related data. This will be null for
+  // functions that aren't using a wasm EH personality.
+  WasmEHFuncInfo *WasmEHInfo = nullptr;
+
   // Keeps track of Windows exception handling related data. This will be null
   // for functions that aren't using a funclet-based EH personality.
   WinEHFuncInfo *WinEHInfo = nullptr;
@@ -319,6 +324,7 @@ class MachineFunction {
 
   bool CallsEHReturn = false;
   bool CallsUnwindInit = false;
+  bool HasEHScopes = false;
   bool HasEHFunclets = false;
 
   /// List of C++ TypeInfo used.
@@ -349,17 +355,18 @@ public:
   struct VariableDbgInfo {
     const DILocalVariable *Var;
     const DIExpression *Expr;
-    unsigned Slot;
+    // The Slot can be negative for fixed stack objects.
+    int Slot;
     const DILocation *Loc;
 
     VariableDbgInfo(const DILocalVariable *Var, const DIExpression *Expr,
-                    unsigned Slot, const DILocation *Loc)
+                    int Slot, const DILocation *Loc)
         : Var(Var), Expr(Expr), Slot(Slot), Loc(Loc) {}
   };
   using VariableDbgInfoMapTy = SmallVector<VariableDbgInfo, 4>;
   VariableDbgInfoMapTy VariableDbgInfos;
 
-  MachineFunction(const Function &F, const TargetMachine &TM,
+  MachineFunction(const Function &F, const TargetMachine &Target,
                   const TargetSubtargetInfo &STI, unsigned FunctionNum,
                   MachineModuleInfo &MMI);
   MachineFunction(const MachineFunction &) = delete;
@@ -430,6 +437,12 @@ public:
   MachineConstantPool *getConstantPool() { return ConstantPool; }
   const MachineConstantPool *getConstantPool() const { return ConstantPool; }
 
+  /// getWasmEHFuncInfo - Return information about how the current function uses
+  /// Wasm exception handling. Returns null for functions that don't use wasm
+  /// exception handling.
+  const WasmEHFuncInfo *getWasmEHFuncInfo() const { return WasmEHInfo; }
+  WasmEHFuncInfo *getWasmEHFuncInfo() { return WasmEHInfo; }
+
   /// getWinEHFuncInfo - Return information about how the current function uses
   /// Windows exception handling. Returns null for functions that don't use
   /// funclets for exception handling.
@@ -609,7 +622,7 @@ public:
   //===--------------------------------------------------------------------===//
   // Internal functions used to automatically number MachineBasicBlocks
 
-  /// \brief Adds the MBB to the internal numbering. Returns the unique number
+  /// Adds the MBB to the internal numbering. Returns the unique number
   /// assigned to the MBB.
   unsigned addToMBBNumbering(MachineBasicBlock *MBB) {
     MBBNumbering.push_back(MBB);
@@ -695,14 +708,8 @@ public:
     OperandRecycler.deallocate(Cap, Array);
   }
 
-  /// \brief Allocate and initialize a register mask with @p NumRegister bits.
-  uint32_t *allocateRegisterMask(unsigned NumRegister) {
-    unsigned Size = (NumRegister + 31) / 32;
-    uint32_t *Mask = Allocator.Allocate<uint32_t>(Size);
-    for (unsigned i = 0; i != Size; ++i)
-      Mask[i] = 0;
-    return Mask;
-  }
+  /// Allocate and initialize a register mask with @p NumRegister bits.
+  uint32_t *allocateRegMask();
 
   /// allocateMemRefsArray - Allocate an array to hold MachineMemOperand
   /// pointers.  This array is owned by the MachineFunction.
@@ -759,6 +766,9 @@ public:
   bool callsUnwindInit() const { return CallsUnwindInit; }
   void setCallsUnwindInit(bool b) { CallsUnwindInit = b; }
 
+  bool hasEHScopes() const { return HasEHScopes; }
+  void setHasEHScopes(bool V) { HasEHScopes = V; }
+
   bool hasEHFunclets() const { return HasEHFunclets; }
   void setHasEHFunclets(bool V) { HasEHFunclets = V; }
 
@@ -793,7 +803,7 @@ public:
   void addCleanup(MachineBasicBlock *LandingPad);
 
   void addSEHCatchHandler(MachineBasicBlock *LandingPad, const Function *Filter,
-                          const BlockAddress *RecoverLabel);
+                          const BlockAddress *RecoverBA);
 
   void addSEHCleanupHandler(MachineBasicBlock *LandingPad,
                             const Function *Cleanup);
@@ -860,7 +870,7 @@ public:
 
   /// Collect information used to emit debugging information of a variable.
   void setVariableDbgInfo(const DILocalVariable *Var, const DIExpression *Expr,
-                          unsigned Slot, const DILocation *Loc) {
+                          int Slot, const DILocation *Loc) {
     VariableDbgInfos.emplace_back(Var, Expr, Slot, Loc);
   }
 
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index 3c1c1bb14f42..88e13cdf4138 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -80,7 +80,21 @@ public:
     FrameDestroy = 1 << 1,              // Instruction is used as a part of
                                         // function frame destruction code.
     BundledPred  = 1 << 2,              // Instruction has bundled predecessors.
-    BundledSucc  = 1 << 3               // Instruction has bundled successors.
+    BundledSucc  = 1 << 3,              // Instruction has bundled successors.
+    FmNoNans     = 1 << 4,              // Instruction does not support Fast
+                                        // math nan values.
+    FmNoInfs     = 1 << 5,              // Instruction does not support Fast
+                                        // math infinity values.
+    FmNsz        = 1 << 6,              // Instruction is not required to retain
+                                        // signed zero values.
+    FmArcp       = 1 << 7,              // Instruction supports Fast math
+                                        // reciprocal approximations.
+    FmContract   = 1 << 8,              // Instruction supports Fast math
+                                        // contraction operations like fma.
+    FmAfn        = 1 << 9,              // Instruction may map to Fast math
+                                        // instrinsic approximation.
+    FmReassoc    = 1 << 10              // Instruction supports Fast math
+                                        // reassociation of operand order.
   };
 
 private:
@@ -93,7 +107,7 @@ private:
   using OperandCapacity = ArrayRecycler<MachineOperand>::Capacity;
   OperandCapacity CapOperands;          // Capacity of the Operands array.
 
-  uint8_t Flags = 0;                    // Various bits of additional
+  uint16_t Flags = 0;                   // Various bits of additional
                                         // information about machine
                                         // instruction.
 
@@ -127,7 +141,7 @@ private:
   /// This constructor create a MachineInstr and add the implicit operands.
   /// It reserves space for number of operands specified by
   /// MCInstrDesc.  An explicit DebugLoc is supplied.
-  MachineInstr(MachineFunction &, const MCInstrDesc &MCID, DebugLoc dl,
+  MachineInstr(MachineFunction &, const MCInstrDesc &tid, DebugLoc dl,
                bool NoImp = false);
 
   // MachineInstrs are pool-allocated and owned by MachineFunction.
@@ -175,7 +189,7 @@ public:
   }
 
   /// Return the MI flags bitvector.
-  uint8_t getFlags() const {
+  uint16_t getFlags() const {
     return Flags;
   }
 
@@ -186,7 +200,7 @@ public:
 
   /// Set a MI flag.
   void setFlag(MIFlag Flag) {
-    Flags |= (uint8_t)Flag;
+    Flags |= (uint16_t)Flag;
   }
 
   void setFlags(unsigned flags) {
@@ -197,7 +211,7 @@ public:
 
   /// clearFlag - Clear a MI flag.
   void clearFlag(MIFlag Flag) {
-    Flags &= ~((uint8_t)Flag);
+    Flags &= ~((uint16_t)Flag);
   }
 
   /// Return true if MI is in a bundle (but not the first MI in a bundle).
@@ -278,6 +292,10 @@ public:
   /// this DBG_VALUE instruction.
   const DIExpression *getDebugExpression() const;
 
+  /// Return the debug label referenced by
+  /// this DBG_LABEL instruction.
+  const DILabel *getDebugLabel() const;
+
   /// Emit an error referring to the source location of this instruction.
   /// This should only be used for inline assembly that is somehow
   /// impossible to compile. Other errors should have been handled much
@@ -304,6 +322,11 @@ public:
     return Operands[i];
   }
 
+  /// Returns the total number of definitions.
+  unsigned getNumDefs() const {
+    return getNumExplicitDefs() + MCID->getNumImplicitDefs();
+  }
+
   /// Return true if operand \p OpIdx is a subregister index.
   bool isOperandSubregIdx(unsigned OpIdx) const {
     assert(getOperand(OpIdx).getType() == MachineOperand::MO_Immediate &&
@@ -322,6 +345,9 @@ public:
   /// Returns the number of non-implicit operands.
   unsigned getNumExplicitOperands() const;
 
+  /// Returns the number of non-implicit definitions.
+  unsigned getNumExplicitDefs() const;
+
   /// iterator/begin/end - Iterate over all operands of a machine instruction.
   using mop_iterator = MachineOperand *;
   using const_mop_iterator = const MachineOperand *;
@@ -356,31 +382,29 @@ public:
   /// Implicit definition are not included!
   iterator_range<mop_iterator> defs() {
     return make_range(operands_begin(),
-                      operands_begin() + getDesc().getNumDefs());
+                      operands_begin() + getNumExplicitDefs());
   }
   /// \copydoc defs()
   iterator_range<const_mop_iterator> defs() const {
     return make_range(operands_begin(),
-                      operands_begin() + getDesc().getNumDefs());
+                      operands_begin() + getNumExplicitDefs());
   }
   /// Returns a range that includes all operands that are register uses.
   /// This may include unrelated operands which are not register uses.
   iterator_range<mop_iterator> uses() {
-    return make_range(operands_begin() + getDesc().getNumDefs(),
-                      operands_end());
+    return make_range(operands_begin() + getNumExplicitDefs(), operands_end());
   }
   /// \copydoc uses()
   iterator_range<const_mop_iterator> uses() const {
-    return make_range(operands_begin() + getDesc().getNumDefs(),
-                      operands_end());
+    return make_range(operands_begin() + getNumExplicitDefs(), operands_end());
   }
   iterator_range<mop_iterator> explicit_uses() {
-    return make_range(operands_begin() + getDesc().getNumDefs(),
-                      operands_begin() + getNumExplicitOperands() );
+    return make_range(operands_begin() + getNumExplicitDefs(),
+                      operands_begin() + getNumExplicitOperands());
   }
   iterator_range<const_mop_iterator> explicit_uses() const {
-    return make_range(operands_begin() + getDesc().getNumDefs(),
-                      operands_begin() + getNumExplicitOperands() );
+    return make_range(operands_begin() + getNumExplicitDefs(),
+                      operands_begin() + getNumExplicitOperands());
   }
 
   /// Returns the number of the operand iterator \p I points to.
@@ -391,7 +415,7 @@ public:
   /// Access to memory operands of the instruction
   mmo_iterator memoperands_begin() const { return MemRefs; }
   mmo_iterator memoperands_end() const { return MemRefs + NumMemRefs; }
-  /// Return true if we don't have any memory operands which described the the
+  /// Return true if we don't have any memory operands which described the
   /// memory access done by this instruction.  If this is true, calling code
   /// must be conservative.
   bool memoperands_empty() const { return NumMemRefs == 0; }
@@ -529,6 +553,12 @@ public:
     return hasProperty(MCID::MoveImm, Type);
   }
 
+  /// Return true if this instruction is a register move.
+  /// (including moving values from subreg to reg)
+  bool isMoveReg(QueryType Type = IgnoreBundle) const {
+    return hasProperty(MCID::MoveReg, Type);
+  }
+
   /// Return true if this instruction is a bitcast instruction.
   bool isBitcast(QueryType Type = IgnoreBundle) const {
     return hasProperty(MCID::Bitcast, Type);
@@ -576,7 +606,7 @@ public:
     return hasProperty(MCID::FoldableAsLoad, Type);
   }
 
-  /// \brief Return true if this instruction behaves
+  /// Return true if this instruction behaves
   /// the same way as the generic REG_SEQUENCE instructions.
   /// E.g., on ARM,
   /// dX VMOVDRR rY, rZ
@@ -590,7 +620,7 @@ public:
     return hasProperty(MCID::RegSequence, Type);
   }
 
-  /// \brief Return true if this instruction behaves
+  /// Return true if this instruction behaves
   /// the same way as the generic EXTRACT_SUBREG instructions.
   /// E.g., on ARM,
   /// rX, rY VMOVRRD dZ
@@ -605,7 +635,7 @@ public:
     return hasProperty(MCID::ExtractSubreg, Type);
   }
 
-  /// \brief Return true if this instruction behaves
+  /// Return true if this instruction behaves
   /// the same way as the generic INSERT_SUBREG instructions.
   /// E.g., on ARM,
   /// dX = VSETLNi32 dY, rZ, Imm
@@ -817,6 +847,8 @@ public:
   bool isPosition() const { return isLabel() || isCFIInstruction(); }
 
   bool isDebugValue() const { return getOpcode() == TargetOpcode::DBG_VALUE; }
+  bool isDebugLabel() const { return getOpcode() == TargetOpcode::DBG_LABEL; }
+  bool isDebugInstr() const { return isDebugValue() || isDebugLabel(); }
 
   /// A DBG_VALUE is indirect iff the first operand is a register and
   /// the second operand is an immediate.
@@ -893,6 +925,9 @@ public:
     case TargetOpcode::EH_LABEL:
     case TargetOpcode::GC_LABEL:
     case TargetOpcode::DBG_VALUE:
+    case TargetOpcode::DBG_LABEL:
+    case TargetOpcode::LIFETIME_START:
+    case TargetOpcode::LIFETIME_END:
       return true;
     }
   }
@@ -1047,7 +1082,7 @@ public:
                         const TargetInstrInfo *TII,
                         const TargetRegisterInfo *TRI) const;
 
-  /// \brief Applies the constraints (def/use) implied by this MI on \p Reg to
+  /// Applies the constraints (def/use) implied by this MI on \p Reg to
   /// the given \p CurRC.
   /// If \p ExploreBundle is set and MI is part of a bundle, all the
   /// instructions inside the bundle will be taken into account. In other words,
@@ -1064,7 +1099,7 @@ public:
       const TargetInstrInfo *TII, const TargetRegisterInfo *TRI,
       bool ExploreBundle = false) const;
 
-  /// \brief Applies the constraints (def/use) implied by the \p OpIdx operand
+  /// Applies the constraints (def/use) implied by the \p OpIdx operand
   /// to the given \p CurRC.
   ///
   /// Returns the register class that satisfies both \p CurRC and the
@@ -1233,15 +1268,20 @@ public:
   bool hasComplexRegisterTies() const;
 
   /// Print this MI to \p OS.
+  /// Don't print information that can be inferred from other instructions if
+  /// \p IsStandalone is false. It is usually true when only a fragment of the
+  /// function is printed.
   /// Only print the defs and the opcode if \p SkipOpers is true.
   /// Otherwise, also print operands if \p SkipDebugLoc is true.
   /// Otherwise, also print the debug loc, with a terminating newline.
   /// \p TII is used to print the opcode name.  If it's not present, but the
   /// MI is in a function, the opcode will be printed using the function's TII.
-  void print(raw_ostream &OS, bool SkipOpers = false, bool SkipDebugLoc = false,
+  void print(raw_ostream &OS, bool IsStandalone = true, bool SkipOpers = false,
+             bool SkipDebugLoc = false, bool AddNewLine = true,
              const TargetInstrInfo *TII = nullptr) const;
-  void print(raw_ostream &OS, ModuleSlotTracker &MST, bool SkipOpers = false,
-             bool SkipDebugLoc = false,
+  void print(raw_ostream &OS, ModuleSlotTracker &MST, bool IsStandalone = true,
+             bool SkipOpers = false, bool SkipDebugLoc = false,
+             bool AddNewLine = true,
              const TargetInstrInfo *TII = nullptr) const;
   void dump() const;
   /// @}
@@ -1281,7 +1321,7 @@ public:
 
   /// Erase an operand from an instruction, leaving it with one
   /// fewer operand than it started with.
-  void RemoveOperand(unsigned i);
+  void RemoveOperand(unsigned OpNo);
 
   /// Add a MachineMemOperand to the machine instruction.
   /// This function should be used only occasionally. The setMemRefs function
@@ -1311,6 +1351,11 @@ public:
   /// modify the memrefs of the this MachineInstr.
   std::pair<mmo_iterator, unsigned> mergeMemRefsWith(const MachineInstr& Other);
 
+  /// Return the MIFlags which represent both MachineInstrs. This
+  /// should be used when merging two MachineInstrs into one. This routine does
+  /// not modify the MIFlags of this MachineInstr.
+  uint16_t mergeFlagsWith(const MachineInstr& Other) const;
+
   /// Clear this MachineInstr's memory reference descriptor list.  This resets
   /// the memrefs to their most conservative state.  This should be used only
   /// as a last resort since it greatly pessimizes our knowledge of the memory
@@ -1351,7 +1396,7 @@ private:
   /// Slow path for hasProperty when we're dealing with a bundle.
   bool hasPropertyInBundle(unsigned Mask, QueryType Type) const;
 
-  /// \brief Implements the logic of getRegClassConstraintEffectForVReg for the
+  /// Implements the logic of getRegClassConstraintEffectForVReg for the
   /// this MI and the given operand index \p OpIdx.
   /// If the related operand does not constrained Reg, this returns CurRC.
   const TargetRegisterClass *getRegClassConstraintEffectForVRegImpl(
diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h
index e4f3976ec950..665608755741 100644
--- a/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -20,6 +20,7 @@
 #define LLVM_CODEGEN_MACHINEINSTRBUILDER_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -219,6 +220,9 @@ public:
     assert((MI->isDebugValue() ? static_cast<bool>(MI->getDebugVariable())
                                : true) &&
            "first MDNode argument of a DBG_VALUE not a variable");
+    assert((MI->isDebugLabel() ? static_cast<bool>(MI->getDebugLabel())
+                               : true) &&
+           "first MDNode argument of a DBG_LABEL not a label");
     return *this;
   }
 
@@ -283,6 +287,12 @@ public:
     MI->copyImplicitOps(*MF, OtherMI);
     return *this;
   }
+
+  bool constrainAllUses(const TargetInstrInfo &TII,
+                        const TargetRegisterInfo &TRI,
+                        const RegisterBankInfo &RBI) const {
+    return constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
+  }
 };
 
 /// Builder interface. Specify how to create the initial instruction itself.
@@ -408,6 +418,13 @@ MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL,
                             const MDNode *Expr);
 
 /// This version of the builder builds a DBG_VALUE intrinsic
+/// for a MachineOperand.
+MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL,
+                            const MCInstrDesc &MCID, bool IsIndirect,
+                            MachineOperand &MO, const MDNode *Variable,
+                            const MDNode *Expr);
+
+/// This version of the builder builds a DBG_VALUE intrinsic
 /// for either a value in a register or a register-indirect
 /// address and inserts it at position I.
 MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
@@ -416,6 +433,14 @@ MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
                             unsigned Reg, const MDNode *Variable,
                             const MDNode *Expr);
 
+/// This version of the builder builds a DBG_VALUE intrinsic
+/// for a machine operand and inserts it at position I.
+MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
+                            MachineBasicBlock::iterator I, const DebugLoc &DL,
+                            const MCInstrDesc &MCID, bool IsIndirect,
+                            MachineOperand &MO, const MDNode *Variable,
+                            const MDNode *Expr);
+
 /// Clone a DBG_VALUE whose value has been spilled to FrameIndex.
 MachineInstr *buildDbgValueForSpill(MachineBasicBlock &BB,
                                     MachineBasicBlock::iterator I,
diff --git a/include/llvm/CodeGen/MachineLoopInfo.h b/include/llvm/CodeGen/MachineLoopInfo.h
index 104655e45524..917fb90380f5 100644
--- a/include/llvm/CodeGen/MachineLoopInfo.h
+++ b/include/llvm/CodeGen/MachineLoopInfo.h
@@ -54,7 +54,7 @@ public:
   /// that contains the header.
   MachineBasicBlock *getBottomBlock();
 
-  /// \brief Find the block that contains the loop control variable and the
+  /// Find the block that contains the loop control variable and the
   /// loop test. This will return the latch block if it's one of the exiting
   /// blocks. Otherwise, return the exiting block. Return 'null' when
   /// multiple exiting blocks are present.
@@ -97,7 +97,7 @@ public:
 
   LoopInfoBase<MachineBasicBlock, MachineLoop>& getBase() { return LI; }
 
-  /// \brief Find the block that either is the loop preheader, or could
+  /// Find the block that either is the loop preheader, or could
   /// speculatively be used as the preheader. This is e.g. useful to place
   /// loop setup code. Code that cannot be speculated should not be placed
   /// here. SpeculativePreheader is controlling whether it also tries to
diff --git a/include/llvm/CodeGen/MachineMemOperand.h b/include/llvm/CodeGen/MachineMemOperand.h
index c5b204a79f04..078ef7ca510c 100644
--- a/include/llvm/CodeGen/MachineMemOperand.h
+++ b/include/llvm/CodeGen/MachineMemOperand.h
@@ -184,7 +184,7 @@ public:
   /// atomic operations the atomic ordering requirements when store does not
   /// occur must also be specified.
   MachineMemOperand(MachinePointerInfo PtrInfo, Flags flags, uint64_t s,
-                    unsigned base_alignment,
+                    uint64_t a,
                     const AAMDNodes &AAInfo = AAMDNodes(),
                     const MDNode *Ranges = nullptr,
                     SyncScope::ID SSID = SyncScope::System,
@@ -295,6 +295,9 @@ public:
   /// @{
   void print(raw_ostream &OS) const;
   void print(raw_ostream &OS, ModuleSlotTracker &MST) const;
+  void print(raw_ostream &OS, ModuleSlotTracker &MST,
+             SmallVectorImpl<StringRef> &SSNs, const LLVMContext &Context,
+             const MachineFrameInfo *MFI, const TargetInstrInfo *TII) const;
   /// @}
 
   friend bool operator==(const MachineMemOperand &LHS,
diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h
index 4be7942c2c64..53e8889d118a 100644
--- a/include/llvm/CodeGen/MachineOperand.h
+++ b/include/llvm/CodeGen/MachineOperand.h
@@ -74,7 +74,7 @@ public:
 private:
   /// OpKind - Specify what kind of operand this is.  This discriminates the
   /// union.
-  MachineOperandType OpKind : 8;
+  unsigned OpKind : 8;
 
   /// Subregister number for MO_Register.  A value of 0 indicates the
   /// MO_Register has no subReg.
@@ -85,17 +85,17 @@ private:
   /// TiedTo - Non-zero when this register operand is tied to another register
   /// operand. The encoding of this field is described in the block comment
   /// before MachineInstr::tieOperands().
-  unsigned char TiedTo : 4;
+  unsigned TiedTo : 4;
 
   /// IsDef - True if this is a def, false if this is a use of the register.
   /// This is only valid on register operands.
   ///
-  bool IsDef : 1;
+  unsigned IsDef : 1;
 
   /// IsImp - True if this is an implicit def or use, false if it is explicit.
   /// This is only valid on register opderands.
   ///
-  bool IsImp : 1;
+  unsigned IsImp : 1;
 
   /// IsDeadOrKill
   /// For uses: IsKill - True if this instruction is the last use of the
@@ -103,14 +103,10 @@ private:
   /// For defs: IsDead - True if this register is never used by a subsequent
   /// instruction.
   /// This is only valid on register operands.
-  bool IsDeadOrKill : 1;
+  unsigned IsDeadOrKill : 1;
 
-  /// IsRenamable - True if this register may be renamed, i.e. it does not
-  /// generate a value that is somehow read in a way that is not represented by
-  /// the Machine IR (e.g. to meet an ABI or ISA requirement).  This is only
-  /// valid on physical register operands.  Virtual registers are assumed to
-  /// always be renamable regardless of the value of this field.
-  bool IsRenamable : 1;
+  /// See isRenamable().
+  unsigned IsRenamable : 1;
 
   /// IsUndef - True if this register operand reads an "undef" value, i.e. the
   /// read value doesn't matter.  This flag can be set on both use and def
@@ -129,7 +125,7 @@ private:
   /// Any register can be used for %2, and its value doesn't matter, but
   /// the two operands must be the same register.
   ///
-  bool IsUndef : 1;
+  unsigned IsUndef : 1;
 
   /// IsInternalRead - True if this operand reads a value that was defined
   /// inside the same instruction or bundle.  This flag can be set on both use
@@ -140,16 +136,16 @@ private:
   /// When this flag is set, the instruction bundle must contain at least one
   /// other def of the register.  If multiple instructions in the bundle define
   /// the register, the meaning is target-defined.
-  bool IsInternalRead : 1;
+  unsigned IsInternalRead : 1;
 
   /// IsEarlyClobber - True if this MO_Register 'def' operand is written to
   /// by the MachineInstr before all input registers are read.  This is used to
   /// model the GCC inline asm '&' constraint modifier.
-  bool IsEarlyClobber : 1;
+  unsigned IsEarlyClobber : 1;
 
   /// IsDebug - True if this MO_Register 'use' operand is in a debug pseudo,
   /// not a real instruction.  Such uses should be ignored during codegen.
-  bool IsDebug : 1;
+  unsigned IsDebug : 1;
 
   /// SmallContents - This really should be part of the Contents union, but
   /// lives out here so we can get a better packed struct.
@@ -198,7 +194,19 @@ private:
   } Contents;
 
   explicit MachineOperand(MachineOperandType K)
-    : OpKind(K), SubReg_TargetFlags(0), ParentMI(nullptr) {}
+    : OpKind(K), SubReg_TargetFlags(0), ParentMI(nullptr) {
+    // Assert that the layout is what we expect. It's easy to grow this object.
+    static_assert(alignof(MachineOperand) <= alignof(int64_t),
+                  "MachineOperand shouldn't be more than 8 byte aligned");
+    static_assert(sizeof(Contents) <= 2 * sizeof(void *),
+                  "Contents should be at most two pointers");
+    static_assert(sizeof(MachineOperand) <=
+                      alignTo<alignof(int64_t)>(2 * sizeof(unsigned) +
+                                                3 * sizeof(void *)),
+                  "MachineOperand too big. Should be Kind, SmallContents, "
+                  "ParentMI, and Contents");
+  }
+
 public:
   /// getType - Returns the MachineOperandType for this operand.
   ///
@@ -238,7 +246,7 @@ public:
   /// MO_Immediate operands can also be subreg idices. If it's the case, the
   /// subreg index name will be printed. MachineInstr::isOperandSubregIdx can be
   /// called to check this.
-  static void printSubregIdx(raw_ostream &OS, uint64_t Index,
+  static void printSubRegIdx(raw_ostream &OS, uint64_t Index,
                              const TargetRegisterInfo *TRI);
 
   /// Print operand target flags.
@@ -270,6 +278,9 @@ public:
   /// \param PrintDef - whether we want to print `def` on an operand which
   /// isDef. Sometimes, if the operand is printed before '=', we don't print
   /// `def`.
+  /// \param IsStandalone - whether we want a verbose output of the MO. This
+  /// prints extra information that can be easily inferred when printing the
+  /// whole function, but not when printing only a fragment of it.
   /// \param ShouldPrintRegisterTies - whether we want to print register ties.
   /// Sometimes they are easily determined by the instruction's descriptor
   /// (MachineInstr::hasComplexRegiterTies can determine if it's needed).
@@ -280,10 +291,16 @@ public:
   /// information from it's parent.
   /// \param IntrinsicInfo - same as \p TRI.
   void print(raw_ostream &os, ModuleSlotTracker &MST, LLT TypeToPrint,
-             bool PrintDef, bool ShouldPrintRegisterTies,
+             bool PrintDef, bool IsStandalone, bool ShouldPrintRegisterTies,
              unsigned TiedOperandIdx, const TargetRegisterInfo *TRI,
              const TargetIntrinsicInfo *IntrinsicInfo) const;
 
+  /// Same as print(os, TRI, IntrinsicInfo), but allows to specify the low-level
+  /// type to be printed the same way the full version of print(...) does it.
+  void print(raw_ostream &os, LLT TypeToPrint,
+             const TargetRegisterInfo *TRI = nullptr,
+             const TargetIntrinsicInfo *IntrinsicInfo = nullptr) const;
+
   void dump() const;
 
   //===--------------------------------------------------------------------===//
@@ -369,6 +386,35 @@ public:
     return IsUndef;
   }
 
+  /// isRenamable - Returns true if this register may be renamed, i.e. it does
+  /// not generate a value that is somehow read in a way that is not represented
+  /// by the Machine IR (e.g. to meet an ABI or ISA requirement).  This is only
+  /// valid on physical register operands.  Virtual registers are assumed to
+  /// always be renamable regardless of the value of this field.
+  ///
+  /// Operands that are renamable can freely be changed to any other register
+  /// that is a member of the register class returned by
+  /// MI->getRegClassConstraint().
+  ///
+  /// isRenamable can return false for several different reasons:
+  ///
+  /// - ABI constraints (since liveness is not always precisely modeled).  We
+  ///   conservatively handle these cases by setting all physical register
+  ///   operands that didn’t start out as virtual regs to not be renamable.
+  ///   Also any physical register operands created after register allocation or
+  ///   whose register is changed after register allocation will not be
+  ///   renamable.  This state is tracked in the MachineOperand::IsRenamable
+  ///   bit.
+  ///
+  /// - Opcode/target constraints: for opcodes that have complex register class
+  ///   requirements (e.g. that depend on other operands/instructions), we set
+  ///   hasExtraSrcRegAllocReq/hasExtraDstRegAllocReq in the machine opcode
+  ///   description.  Operands belonging to instructions with opcodes that are
+  ///   marked hasExtraSrcRegAllocReq/hasExtraDstRegAllocReq return false from
+  ///   isRenamable().  Additionally, the AllowRegisterRenaming target property
+  ///   prevents any operands from being marked renamable for targets that don't
+  ///   have detailed opcode hasExtraSrcRegAllocReq/hasExtraDstRegAllocReq
+  ///   values.
   bool isRenamable() const;
 
   bool isInternalRead() const {
@@ -458,10 +504,6 @@ public:
 
   void setIsRenamable(bool Val = true);
 
-  /// Set IsRenamable to true if there are no extra register allocation
-  /// requirements placed on this operand by the parent instruction's opcode.
-  void setIsRenamableIfNoExtraRegAllocReq();
-
   void setIsInternalRead(bool Val = true) {
     assert(isReg() && "Wrong MachineOperand mutator");
     IsInternalRead = Val;
@@ -574,6 +616,11 @@ public:
     return Contents.RegMask;
   }
 
+  /// Returns number of elements needed for a regmask array.
+  static unsigned getRegMaskSize(unsigned NumRegs) {
+    return (NumRegs + 31) / 32;
+  }
+
   /// getRegLiveOut - Returns a bit mask of live-out registers.
   const uint32_t *getRegLiveOut() const {
     assert(isRegLiveOut() && "Wrong MachineOperand accessor");
@@ -594,6 +641,11 @@ public:
     Contents.ImmVal = immVal;
   }
 
+  void setCImm(const ConstantInt *CI) {
+    assert(isCImm() && "Wrong MachineOperand mutator");
+    Contents.CI = CI;
+  }
+
   void setFPImm(const ConstantFP *CFP) {
     assert(isFPImm() && "Wrong MachineOperand mutator");
     Contents.CFP = CFP;
@@ -641,7 +693,7 @@ public:
   /// should stay in sync with the hash_value overload below.
   bool isIdenticalTo(const MachineOperand &Other) const;
 
-  /// \brief MachineOperand hash_value overload.
+  /// MachineOperand hash_value overload.
   ///
   /// Note that this includes the same information in the hash that
   /// isIdenticalTo uses for comparison. It is thus suited for use in hash
diff --git a/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h b/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
index 2fdefbed37ce..a7ce870400c2 100644
--- a/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
+++ b/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
@@ -24,7 +24,7 @@ class MachineBasicBlock;
 class MachineBlockFrequencyInfo;
 class MachineInstr;
 
-/// \brief Common features for diagnostics dealing with optimization remarks
+/// Common features for diagnostics dealing with optimization remarks
 /// that are used by machine passes.
 class DiagnosticInfoMIROptimization : public DiagnosticInfoOptimizationBase {
 public:
@@ -151,7 +151,7 @@ public:
   /// Emit an optimization remark.
   void emit(DiagnosticInfoOptimizationBase &OptDiag);
 
-  /// \brief Whether we allow for extra compile-time budget to perform more
+  /// Whether we allow for extra compile-time budget to perform more
   /// analysis to be more informative.
   ///
   /// This is useful to enable additional missed optimizations to be reported
@@ -164,7 +164,7 @@ public:
             .getDiagHandlerPtr()->isAnyRemarkEnabled(PassName));
   }
 
-  /// \brief Take a lambda that returns a remark which will be emitted.  Second
+  /// Take a lambda that returns a remark which will be emitted.  Second
   /// argument is only used to restrict this to functions.
   template <typename T>
   void emit(T RemarkBuilder, decltype(RemarkBuilder()) * = nullptr) {
@@ -192,7 +192,7 @@ private:
   /// Similar but use value from \p OptDiag and update hotness there.
   void computeHotness(DiagnosticInfoMIROptimization &Remark);
 
-  /// \brief Only allow verbose messages if we know we're filtering by hotness
+  /// Only allow verbose messages if we know we're filtering by hotness
   /// (BFI is only set in this case).
   bool shouldEmitVerbose() { return MBFI != nullptr; }
 };
diff --git a/include/llvm/CodeGen/MachineOutliner.h b/include/llvm/CodeGen/MachineOutliner.h
new file mode 100644
index 000000000000..4249a99a891b
--- /dev/null
+++ b/include/llvm/CodeGen/MachineOutliner.h
@@ -0,0 +1,226 @@
+//===---- MachineOutliner.h - Outliner data structures ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Contains all data structures shared between the outliner implemented in
+/// MachineOutliner.cpp and target implementations of the outliner.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MACHINEOUTLINER_H
+#define LLVM_MACHINEOUTLINER_H
+
+#include "llvm/CodeGen/LiveRegUnits.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+
+namespace llvm {
+namespace outliner {
+
+/// Represents how an instruction should be mapped by the outliner.
+/// \p Legal instructions are those which are safe to outline.
+/// \p LegalTerminator instructions are safe to outline, but only as the
+/// last instruction in a sequence.
+/// \p Illegal instructions are those which cannot be outlined.
+/// \p Invisible instructions are instructions which can be outlined, but
+/// shouldn't actually impact the outlining result.
+enum InstrType { Legal, LegalTerminator, Illegal, Invisible };
+
+/// An individual sequence of instructions to be replaced with a call to
+/// an outlined function.
+struct Candidate {
+private:
+  /// The start index of this \p Candidate in the instruction list.
+  unsigned StartIdx;
+
+  /// The number of instructions in this \p Candidate.
+  unsigned Len;
+
+  // The first instruction in this \p Candidate.
+  MachineBasicBlock::iterator FirstInst;
+
+  // The last instruction in this \p Candidate.
+  MachineBasicBlock::iterator LastInst;
+
+  // The basic block that contains this Candidate.
+  MachineBasicBlock *MBB;
+
+  /// Cost of calling an outlined function from this point as defined by the
+  /// target.
+  unsigned CallOverhead;
+
+public:
+  /// The index of this \p Candidate's \p OutlinedFunction in the list of
+  /// \p OutlinedFunctions.
+  unsigned FunctionIdx;
+
+  /// Set to false if the candidate overlapped with another candidate.
+  bool InCandidateList = true;
+
+  /// Identifier denoting the instructions to emit to call an outlined function
+  /// from this point. Defined by the target.
+  unsigned CallConstructionID;
+
+  /// Contains physical register liveness information for the MBB containing
+  /// this \p Candidate.
+  ///
+  /// This is optionally used by the target to calculate more fine-grained
+  /// cost model information.
+  LiveRegUnits LRU;
+
+  /// Return the number of instructions in this Candidate.
+  unsigned getLength() const { return Len; }
+
+  /// Return the start index of this candidate.
+  unsigned getStartIdx() const { return StartIdx; }
+
+  /// Return the end index of this candidate.
+  unsigned getEndIdx() const { return StartIdx + Len - 1; }
+
+  /// Set the CallConstructionID and CallOverhead of this candidate to CID and
+  /// CO respectively.
+  void setCallInfo(unsigned CID, unsigned CO) {
+    CallConstructionID = CID;
+    CallOverhead = CO;
+  }
+
+  /// Returns the call overhead of this candidate if it is in the list.
+  unsigned getCallOverhead() const {
+    return InCandidateList ? CallOverhead : 0;
+  }
+
+  MachineBasicBlock::iterator &front() { return FirstInst; }
+  MachineBasicBlock::iterator &back() { return LastInst; }
+  MachineFunction *getMF() const { return MBB->getParent(); }
+  MachineBasicBlock *getMBB() const { return MBB; }
+
+  /// The number of instructions that would be saved by outlining every
+  /// candidate of this type.
+  ///
+  /// This is a fixed value which is not updated during the candidate pruning
+  /// process. It is only used for deciding which candidate to keep if two
+  /// candidates overlap. The true benefit is stored in the OutlinedFunction
+  /// for some given candidate.
+  unsigned Benefit = 0;
+
+  Candidate(unsigned StartIdx, unsigned Len,
+            MachineBasicBlock::iterator &FirstInst,
+            MachineBasicBlock::iterator &LastInst, MachineBasicBlock *MBB,
+            unsigned FunctionIdx)
+      : StartIdx(StartIdx), Len(Len), FirstInst(FirstInst), LastInst(LastInst),
+        MBB(MBB), FunctionIdx(FunctionIdx) {}
+  Candidate() {}
+
+  /// Used to ensure that \p Candidates are outlined in an order that
+  /// preserves the start and end indices of other \p Candidates.
+  bool operator<(const Candidate &RHS) const {
+    return getStartIdx() > RHS.getStartIdx();
+  }
+
+  /// Compute the registers that are live across this Candidate.
+  /// Used by targets that need this information for cost model calculation.
+  /// If a target does not need this information, then this should not be
+  /// called.
+  void initLRU(const TargetRegisterInfo &TRI) {
+    assert(MBB->getParent()->getRegInfo().tracksLiveness() &&
+           "Candidate's Machine Function must track liveness");
+    LRU.init(TRI);
+    LRU.addLiveOuts(*MBB);
+
+    // Compute liveness from the end of the block up to the beginning of the
+    // outlining candidate.
+    std::for_each(MBB->rbegin(), (MachineBasicBlock::reverse_iterator)front(),
+                  [this](MachineInstr &MI) { LRU.stepBackward(MI); });
+  }
+};
+
+/// The information necessary to create an outlined function for some
+/// class of candidate.
+struct OutlinedFunction {
+
+private:
+  /// The number of candidates for this \p OutlinedFunction.
+  unsigned OccurrenceCount = 0;
+
+public:
+  std::vector<std::shared_ptr<Candidate>> Candidates;
+
+  /// The actual outlined function created.
+  /// This is initialized after we go through and create the actual function.
+  MachineFunction *MF = nullptr;
+
+  /// A number assigned to this function which appears at the end of its name.
+  unsigned Name;
+
+  /// The sequence of integers corresponding to the instructions in this
+  /// function.
+  std::vector<unsigned> Sequence;
+
+  /// Represents the size of a sequence in bytes. (Some instructions vary
+  /// widely in size, so just counting the instructions isn't very useful.)
+  unsigned SequenceSize;
+
+  /// Target-defined overhead of constructing a frame for this function.
+  unsigned FrameOverhead;
+
+  /// Target-defined identifier for constructing a frame for this function.
+  unsigned FrameConstructionID;
+
+  /// Return the number of candidates for this \p OutlinedFunction.
+  unsigned getOccurrenceCount() { return OccurrenceCount; }
+
+  /// Decrement the occurrence count of this OutlinedFunction and return the
+  /// new count.
+  unsigned decrement() {
+    assert(OccurrenceCount > 0 && "Can't decrement an empty function!");
+    OccurrenceCount--;
+    return getOccurrenceCount();
+  }
+
+  /// Return the number of bytes it would take to outline this
+  /// function.
+  unsigned getOutliningCost() {
+    unsigned CallOverhead = 0;
+    for (std::shared_ptr<Candidate> &C : Candidates)
+      CallOverhead += C->getCallOverhead();
+    return CallOverhead + SequenceSize + FrameOverhead;
+  }
+
+  /// Return the size in bytes of the unoutlined sequences.
+  unsigned getNotOutlinedCost() { return OccurrenceCount * SequenceSize; }
+
+  /// Return the number of instructions that would be saved by outlining
+  /// this function.
+  unsigned getBenefit() {
+    unsigned NotOutlinedCost = getNotOutlinedCost();
+    unsigned OutlinedCost = getOutliningCost();
+    return (NotOutlinedCost < OutlinedCost) ? 0
+                                            : NotOutlinedCost - OutlinedCost;
+  }
+
+  OutlinedFunction(std::vector<Candidate> &Cands,
+                   unsigned SequenceSize, unsigned FrameOverhead,
+                   unsigned FrameConstructionID)
+      : SequenceSize(SequenceSize), FrameOverhead(FrameOverhead),
+        FrameConstructionID(FrameConstructionID) {
+    OccurrenceCount = Cands.size();
+    for (Candidate &C : Cands)
+      Candidates.push_back(std::make_shared<outliner::Candidate>(C));
+
+    unsigned B = getBenefit();
+    for (std::shared_ptr<Candidate> &C : Candidates)
+      C->Benefit = B;
+  }
+
+  OutlinedFunction() {}
+};
+} // namespace outliner
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h
index 3be94f802170..5bf4a49c8b3b 100644
--- a/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -20,6 +20,7 @@
 #include "llvm/ADT/IndexedMap.h"
 #include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
 #include "llvm/CodeGen/LowLevelType.h"
@@ -75,6 +76,13 @@ private:
              VirtReg2IndexFunctor>
       VRegInfo;
 
+  /// Map for recovering vreg name from vreg number.
+  /// This map is used by the MIR Printer.
+  IndexedMap<std::string, VirtReg2IndexFunctor> VReg2Name;
+
+  /// StringSet that is used to unique vreg names.
+  StringSet<> VRegNames;
+
   /// The flag is true upon \p UpdatedCSRs initialization
   /// and false otherwise.
   bool IsUpdatedCSRsInitialized;
@@ -128,9 +136,9 @@ private:
   /// started.
   BitVector ReservedRegs;
 
-  using VRegToTypeMap = DenseMap<unsigned, LLT>;
-  /// Map generic virtual registers to their actual size.
-  mutable std::unique_ptr<VRegToTypeMap> VRegToType;
+  using VRegToTypeMap = IndexedMap<LLT, VirtReg2IndexFunctor>;
+  /// Map generic virtual registers to their low-level type.
+  VRegToTypeMap VRegToType;
 
   /// Keep track of the physical registers that are live in to the function.
   /// Live in values are typically arguments in registers.  LiveIn values are
@@ -418,6 +426,20 @@ public:
   /// specified register (it may be live-in).
   bool def_empty(unsigned RegNo) const { return def_begin(RegNo) == def_end(); }
 
+  StringRef getVRegName(unsigned Reg) const {
+    return VReg2Name.inBounds(Reg) ? StringRef(VReg2Name[Reg]) : "";
+  }
+
+  void insertVRegByName(StringRef Name, unsigned Reg) {
+    assert((Name.empty() || VRegNames.find(Name) == VRegNames.end()) &&
+           "Named VRegs Must be Unique.");
+    if (!Name.empty()) {
+      VRegNames.insert(Name);
+      VReg2Name.grow(Reg);
+      VReg2Name[Reg] = Name.str();
+    }
+  }
+
   /// Return true if there is exactly one operand defining the specified
   /// register.
   bool hasOneDef(unsigned RegNo) const {
@@ -548,12 +570,16 @@ public:
   /// except that it also changes any definitions of the register as well.
   ///
   /// Note that it is usually necessary to first constrain ToReg's register
-  /// class to match the FromReg constraints using:
+  /// class and register bank to match the FromReg constraints using one of the
+  /// methods:
   ///
   ///   constrainRegClass(ToReg, getRegClass(FromReg))
+  ///   constrainRegAttrs(ToReg, FromReg)
+  ///   RegisterBankInfo::constrainGenericRegister(ToReg,
+  ///       *MRI.getRegClass(FromReg), MRI)
   ///
-  /// That function will return NULL if the virtual registers have incompatible
-  /// constraints.
+  /// These functions will return a falsy result if the virtual registers have
+  /// incompatible constraints.
   ///
   /// Note that if ToReg is a physical register the function will replace and
   /// apply sub registers to ToReg in order to obtain a final/proper physical
@@ -653,10 +679,30 @@ public:
   /// new register class, or NULL if no such class exists.
   /// This should only be used when the constraint is known to be trivial, like
   /// GR32 -> GR32_NOSP. Beware of increasing register pressure.
+  ///
+  /// \note Assumes that the register has a register class assigned.
+  /// Use RegisterBankInfo::constrainGenericRegister in GlobalISel's
+  /// InstructionSelect pass and constrainRegAttrs in every other pass,
+  /// including non-select passes of GlobalISel, instead.
   const TargetRegisterClass *constrainRegClass(unsigned Reg,
                                                const TargetRegisterClass *RC,
                                                unsigned MinNumRegs = 0);
 
+  /// Constrain the register class or the register bank of the virtual register
+  /// \p Reg to be a common subclass and a common bank of both registers
+  /// provided respectively. Do nothing if any of the attributes (classes,
+  /// banks, or low-level types) of the registers are deemed incompatible, or if
+  /// the resulting register will have a class smaller than before and of size
+  /// less than \p MinNumRegs. Return true if such register attributes exist,
+  /// false otherwise.
+  ///
+  /// \note Assumes that each register has either a low-level type or a class
+  /// assigned, but not both. Use this method instead of constrainRegClass and
+  /// RegisterBankInfo::constrainGenericRegister everywhere but SelectionDAG
+  /// ISel / FastISel and GlobalISel's InstructionSelect pass respectively.
+  bool constrainRegAttrs(unsigned Reg, unsigned ConstrainingReg,
+                         unsigned MinNumRegs = 0);
+
   /// recomputeRegClass - Try to find a legal super-class of Reg's register
   /// class that still satisfies the constraints from the instructions using
   /// Reg.  Returns true if Reg was upgraded.
@@ -668,26 +714,23 @@ public:
 
   /// createVirtualRegister - Create and return a new virtual register in the
   /// function with the specified register class.
-  unsigned createVirtualRegister(const TargetRegisterClass *RegClass);
+  unsigned createVirtualRegister(const TargetRegisterClass *RegClass,
+                                 StringRef Name = "");
 
-  /// Accessor for VRegToType. This accessor should only be used
-  /// by global-isel related work.
-  VRegToTypeMap &getVRegToType() const {
-    if (!VRegToType)
-      VRegToType.reset(new VRegToTypeMap);
-    return *VRegToType.get();
-  }
-
-  /// Get the low-level type of \p VReg or LLT{} if VReg is not a generic
+  /// Get the low-level type of \p Reg or LLT{} if Reg is not a generic
   /// (target independent) virtual register.
-  LLT getType(unsigned VReg) const;
+  LLT getType(unsigned Reg) const {
+    if (TargetRegisterInfo::isVirtualRegister(Reg) && VRegToType.inBounds(Reg))
+      return VRegToType[Reg];
+    return LLT{};
+  }
 
   /// Set the low-level type of \p VReg to \p Ty.
   void setType(unsigned VReg, LLT Ty);
 
   /// Create and return a new generic virtual register with low-level
   /// type \p Ty.
-  unsigned createGenericVirtualRegister(LLT Ty);
+  unsigned createGenericVirtualRegister(LLT Ty, StringRef Name = "");
 
   /// Remove all types associated to virtual registers (after instruction
   /// selection and constraining of all generic virtual registers).
@@ -698,7 +741,7 @@ public:
   /// temporarily while constructing machine instructions. Most operations are
   /// undefined on an incomplete register until one of setRegClass(),
   /// setRegBank() or setSize() has been called on it.
-  unsigned createIncompleteVirtualRegister();
+  unsigned createIncompleteVirtualRegister(StringRef Name = "");
 
   /// getNumVirtRegs - Return the number of virtual registers created.
   unsigned getNumVirtRegs() const { return VRegInfo.size(); }
diff --git a/include/llvm/CodeGen/MachineSSAUpdater.h b/include/llvm/CodeGen/MachineSSAUpdater.h
index b5ea2080444d..5e91246b402c 100644
--- a/include/llvm/CodeGen/MachineSSAUpdater.h
+++ b/include/llvm/CodeGen/MachineSSAUpdater.h
@@ -56,7 +56,7 @@ public:
   /// MachineSSAUpdater constructor.  If InsertedPHIs is specified, it will be
   /// filled in with all PHI Nodes created by rewriting.
   explicit MachineSSAUpdater(MachineFunction &MF,
-                        SmallVectorImpl<MachineInstr*> *InsertedPHIs = nullptr);
+                        SmallVectorImpl<MachineInstr*> *NewPHI = nullptr);
   MachineSSAUpdater(const MachineSSAUpdater &) = delete;
   MachineSSAUpdater &operator=(const MachineSSAUpdater &) = delete;
   ~MachineSSAUpdater();
diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h
index e327881de13a..85ffa4eda2b8 100644
--- a/include/llvm/CodeGen/MachineScheduler.h
+++ b/include/llvm/CodeGen/MachineScheduler.h
@@ -237,7 +237,7 @@ public:
   /// be scheduled at the bottom.
   virtual SUnit *pickNode(bool &IsTopNode) = 0;
 
-  /// \brief Scheduler callback to notify that a new subtree is scheduled.
+  /// Scheduler callback to notify that a new subtree is scheduled.
   virtual void scheduleTree(unsigned SubtreeID) {}
 
   /// Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an
@@ -318,11 +318,11 @@ public:
       Mutations.push_back(std::move(Mutation));
   }
 
-  /// \brief True if an edge can be added from PredSU to SuccSU without creating
+  /// True if an edge can be added from PredSU to SuccSU without creating
   /// a cycle.
   bool canAddEdge(SUnit *SuccSU, SUnit *PredSU);
 
-  /// \brief Add a DAG edge to the given SU with the given predecessor
+  /// Add a DAG edge to the given SU with the given predecessor
   /// dependence data.
   ///
   /// \returns true if the edge may be added without creating a cycle OR if an
@@ -374,7 +374,7 @@ protected:
   /// Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
   void placeDebugValues();
 
-  /// \brief dump the scheduled Sequence.
+  /// dump the scheduled Sequence.
   void dumpSchedule() const;
 
   // Lesser helpers...
@@ -445,7 +445,7 @@ public:
   /// Return true if this DAG supports VReg liveness and RegPressure.
   bool hasVRegLiveness() const override { return true; }
 
-  /// \brief Return true if register pressure tracking is enabled.
+  /// Return true if register pressure tracking is enabled.
   bool isTrackingPressure() const { return ShouldTrackPressure; }
 
   /// Get current register pressure for the top scheduled instructions.
@@ -897,6 +897,28 @@ protected:
 #endif
 };
 
+// Utility functions used by heuristics in tryCandidate().
+bool tryLess(int TryVal, int CandVal,
+             GenericSchedulerBase::SchedCandidate &TryCand,
+             GenericSchedulerBase::SchedCandidate &Cand,
+             GenericSchedulerBase::CandReason Reason);
+bool tryGreater(int TryVal, int CandVal,
+                GenericSchedulerBase::SchedCandidate &TryCand,
+                GenericSchedulerBase::SchedCandidate &Cand,
+                GenericSchedulerBase::CandReason Reason);
+bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
+                GenericSchedulerBase::SchedCandidate &Cand,
+                SchedBoundary &Zone);
+bool tryPressure(const PressureChange &TryP,
+                 const PressureChange &CandP,
+                 GenericSchedulerBase::SchedCandidate &TryCand,
+                 GenericSchedulerBase::SchedCandidate &Cand,
+                 GenericSchedulerBase::CandReason Reason,
+                 const TargetRegisterInfo *TRI,
+                 const MachineFunction &MF);
+unsigned getWeakLeft(const SUnit *SU, bool isTop);
+int biasPhysRegCopy(const SUnit *SU, bool isTop);
+
 /// GenericScheduler shrinks the unscheduled zone using heuristics to balance
 /// the schedule.
 class GenericScheduler : public GenericSchedulerBase {
@@ -963,9 +985,8 @@ protected:
                      const RegPressureTracker &RPTracker,
                      RegPressureTracker &TempTracker);
 
-  void tryCandidate(SchedCandidate &Cand,
-                    SchedCandidate &TryCand,
-                    SchedBoundary *Zone);
+  virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
+                            SchedBoundary *Zone) const;
 
   SUnit *pickNodeBidirectional(bool &IsTopNode);
 
diff --git a/include/llvm/CodeGen/MachineValueType.h b/include/llvm/CodeGen/MachineValueType.h
deleted file mode 100644
index b452684757f6..000000000000
--- a/include/llvm/CodeGen/MachineValueType.h
+++ /dev/null
@@ -1,1055 +0,0 @@
-//===- CodeGen/MachineValueType.h - Machine-Level types ---------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the set of machine-level target independent types which
-// legal values in the code generator use.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_MACHINEVALUETYPE_H
-#define LLVM_CODEGEN_MACHINEVALUETYPE_H
-
-#include "llvm/ADT/iterator_range.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include <cassert>
-
-namespace llvm {
-
-  class Type;
-
-  /// Machine Value Type. Every type that is supported natively by some
-  /// processor targeted by LLVM occurs here. This means that any legal value
-  /// type can be represented by an MVT.
-  class MVT {
-  public:
-    enum SimpleValueType : uint8_t {
-      // Simple value types that aren't explicitly part of this enumeration
-      // are considered extended value types.
-      INVALID_SIMPLE_VALUE_TYPE = 0,
-
-      // If you change this numbering, you must change the values in
-      // ValueTypes.td as well!
-      Other          =   1,   // This is a non-standard value
-      i1             =   2,   // This is a 1 bit integer value
-      i8             =   3,   // This is an 8 bit integer value
-      i16            =   4,   // This is a 16 bit integer value
-      i32            =   5,   // This is a 32 bit integer value
-      i64            =   6,   // This is a 64 bit integer value
-      i128           =   7,   // This is a 128 bit integer value
-
-      FIRST_INTEGER_VALUETYPE = i1,
-      LAST_INTEGER_VALUETYPE  = i128,
-
-      f16            =   8,   // This is a 16 bit floating point value
-      f32            =   9,   // This is a 32 bit floating point value
-      f64            =  10,   // This is a 64 bit floating point value
-      f80            =  11,   // This is a 80 bit floating point value
-      f128           =  12,   // This is a 128 bit floating point value
-      ppcf128        =  13,   // This is a PPC 128-bit floating point value
-
-      FIRST_FP_VALUETYPE = f16,
-      LAST_FP_VALUETYPE  = ppcf128,
-
-      v1i1           =  14,   //    1 x i1
-      v2i1           =  15,   //    2 x i1
-      v4i1           =  16,   //    4 x i1
-      v8i1           =  17,   //    8 x i1
-      v16i1          =  18,   //   16 x i1
-      v32i1          =  19,   //   32 x i1
-      v64i1          =  20,   //   64 x i1
-      v128i1         =  21,   //  128 x i1
-      v512i1         =  22,   //  512 x i1
-      v1024i1        =  23,   // 1024 x i1
-
-      v1i8           =  24,   //  1 x i8
-      v2i8           =  25,   //  2 x i8
-      v4i8           =  26,   //  4 x i8
-      v8i8           =  27,   //  8 x i8
-      v16i8          =  28,   // 16 x i8
-      v32i8          =  29,   // 32 x i8
-      v64i8          =  30,   // 64 x i8
-      v128i8         =  31,   //128 x i8
-      v256i8         =  32,   //256 x i8
-
-      v1i16          =  33,   //  1 x i16
-      v2i16          =  34,   //  2 x i16
-      v4i16          =  35,   //  4 x i16
-      v8i16          =  36,   //  8 x i16
-      v16i16         =  37,   // 16 x i16
-      v32i16         =  38,   // 32 x i16
-      v64i16         =  39,   // 64 x i16
-      v128i16        =  40,   //128 x i16
-
-      v1i32          =  41,   //  1 x i32
-      v2i32          =  42,   //  2 x i32
-      v4i32          =  43,   //  4 x i32
-      v8i32          =  44,   //  8 x i32
-      v16i32         =  45,   // 16 x i32
-      v32i32         =  46,   // 32 x i32
-      v64i32         =  47,   // 64 x i32
-
-      v1i64          =  48,   //  1 x i64
-      v2i64          =  49,   //  2 x i64
-      v4i64          =  50,   //  4 x i64
-      v8i64          =  51,   //  8 x i64
-      v16i64         =  52,   // 16 x i64
-      v32i64         =  53,   // 32 x i64
-
-      v1i128         =  54,   //  1 x i128
-
-      // Scalable integer types
-      nxv1i1         =  55,   // n x  1 x i1
-      nxv2i1         =  56,   // n x  2 x i1
-      nxv4i1         =  57,   // n x  4 x i1
-      nxv8i1         =  58,   // n x  8 x i1
-      nxv16i1        =  59,   // n x 16 x i1
-      nxv32i1        =  60,   // n x 32 x i1
-
-      nxv1i8         =  61,   // n x  1 x i8
-      nxv2i8         =  62,   // n x  2 x i8
-      nxv4i8         =  63,   // n x  4 x i8
-      nxv8i8         =  64,   // n x  8 x i8
-      nxv16i8        =  65,   // n x 16 x i8
-      nxv32i8        =  66,   // n x 32 x i8
-
-      nxv1i16        =  67,   // n x  1 x i16
-      nxv2i16        =  68,   // n x  2 x i16
-      nxv4i16        =  69,   // n x  4 x i16
-      nxv8i16        =  70,   // n x  8 x i16
-      nxv16i16       =  71,   // n x 16 x i16
-      nxv32i16       =  72,   // n x 32 x i16
-
-      nxv1i32        =  73,   // n x  1 x i32
-      nxv2i32        =  74,   // n x  2 x i32
-      nxv4i32        =  75,   // n x  4 x i32
-      nxv8i32        =  76,   // n x  8 x i32
-      nxv16i32       =  77,   // n x 16 x i32
-      nxv32i32       =  78,   // n x 32 x i32
-
-      nxv1i64        =  79,   // n x  1 x i64
-      nxv2i64        =  80,   // n x  2 x i64
-      nxv4i64        =  81,   // n x  4 x i64
-      nxv8i64        =  82,   // n x  8 x i64
-      nxv16i64       =  83,   // n x 16 x i64
-      nxv32i64       =  84,   // n x 32 x i64
-
-      FIRST_INTEGER_VECTOR_VALUETYPE = v1i1,
-      LAST_INTEGER_VECTOR_VALUETYPE = nxv32i64,
-
-      FIRST_INTEGER_SCALABLE_VALUETYPE = nxv1i1,
-      LAST_INTEGER_SCALABLE_VALUETYPE = nxv32i64,
-
-      v2f16          =  85,   //  2 x f16
-      v4f16          =  86,   //  4 x f16
-      v8f16          =  87,   //  8 x f16
-      v1f32          =  88,   //  1 x f32
-      v2f32          =  89,   //  2 x f32
-      v4f32          =  90,   //  4 x f32
-      v8f32          =  91,   //  8 x f32
-      v16f32         =  92,   // 16 x f32
-      v1f64          =  93,   //  1 x f64
-      v2f64          =  94,   //  2 x f64
-      v4f64          =  95,   //  4 x f64
-      v8f64          =  96,   //  8 x f64
-
-      nxv2f16        =  97,   // n x  2 x f16
-      nxv4f16        =  98,   // n x  4 x f16
-      nxv8f16        =  99,   // n x  8 x f16
-      nxv1f32        = 100,   // n x  1 x f32
-      nxv2f32        = 101,   // n x  2 x f32
-      nxv4f32        = 102,   // n x  4 x f32
-      nxv8f32        = 103,   // n x  8 x f32
-      nxv16f32       = 104,   // n x 16 x f32
-      nxv1f64        = 105,   // n x  1 x f64
-      nxv2f64        = 106,   // n x  2 x f64
-      nxv4f64        = 107,   // n x  4 x f64
-      nxv8f64        = 108,   // n x  8 x f64
-
-      FIRST_FP_VECTOR_VALUETYPE = v2f16,
-      LAST_FP_VECTOR_VALUETYPE = nxv8f64,
-
-      FIRST_FP_SCALABLE_VALUETYPE = nxv2f16,
-      LAST_FP_SCALABLE_VALUETYPE = nxv8f64,
-
-      FIRST_VECTOR_VALUETYPE = v1i1,
-      LAST_VECTOR_VALUETYPE  = nxv8f64,
-
-      x86mmx         =  109,   // This is an X86 MMX value
-
-      Glue           =  110,   // This glues nodes together during pre-RA sched
-
-      isVoid         =  111,   // This has no value
-
-      Untyped        =  112,   // This value takes a register, but has
-                               // unspecified type.  The register class
-                               // will be determined by the opcode.
-
-      FIRST_VALUETYPE = 1,     // This is always the beginning of the list.
-      LAST_VALUETYPE =  113,   // This always remains at the end of the list.
-
-      // This is the current maximum for LAST_VALUETYPE.
-      // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
-      // This value must be a multiple of 32.
-      MAX_ALLOWED_VALUETYPE = 128,
-
-      // A value of type llvm::TokenTy
-      token          = 248,
-
-      // This is MDNode or MDString.
-      Metadata       = 249,
-
-      // An int value the size of the pointer of the current
-      // target to any address space. This must only be used internal to
-      // tblgen. Other than for overloading, we treat iPTRAny the same as iPTR.
-      iPTRAny        = 250,
-
-      // A vector with any length and element size. This is used
-      // for intrinsics that have overloadings based on vector types.
-      // This is only for tblgen's consumption!
-      vAny           = 251,
-
-      // Any floating-point or vector floating-point value. This is used
-      // for intrinsics that have overloadings based on floating-point types.
-      // This is only for tblgen's consumption!
-      fAny           = 252,
-
-      // An integer or vector integer value of any bit width. This is
-      // used for intrinsics that have overloadings based on integer bit widths.
-      // This is only for tblgen's consumption!
-      iAny           = 253,
-
-      // An int value the size of the pointer of the current
-      // target.  This should only be used internal to tblgen!
-      iPTR           = 254,
-
-      // Any type. This is used for intrinsics that have overloadings.
-      // This is only for tblgen's consumption!
-      Any            = 255
-    };
-
-    SimpleValueType SimpleTy = INVALID_SIMPLE_VALUE_TYPE;
-
-    // A class to represent the number of elements in a vector
-    //
-    // For fixed-length vectors, the total number of elements is equal to 'Min'
-    // For scalable vectors, the total number of elements is a multiple of 'Min'
-    class ElementCount {
-    public:
-      unsigned Min;
-      bool Scalable;
-
-      ElementCount(unsigned Min, bool Scalable)
-      : Min(Min), Scalable(Scalable) {}
-
-      ElementCount operator*(unsigned RHS) {
-        return { Min * RHS, Scalable };
-      }
-
-      ElementCount& operator*=(unsigned RHS) {
-        Min *= RHS;
-        return *this;
-      }
-
-      ElementCount operator/(unsigned RHS) {
-        return { Min / RHS, Scalable };
-      }
-
-      ElementCount& operator/=(unsigned RHS) {
-        Min /= RHS;
-        return *this;
-      }
-
-      bool operator==(const ElementCount& RHS) {
-        return Min == RHS.Min && Scalable == RHS.Scalable;
-      }
-    };
-
-    constexpr MVT() = default;
-    constexpr MVT(SimpleValueType SVT) : SimpleTy(SVT) {}
-
-    bool operator>(const MVT& S)  const { return SimpleTy >  S.SimpleTy; }
-    bool operator<(const MVT& S)  const { return SimpleTy <  S.SimpleTy; }
-    bool operator==(const MVT& S) const { return SimpleTy == S.SimpleTy; }
-    bool operator!=(const MVT& S) const { return SimpleTy != S.SimpleTy; }
-    bool operator>=(const MVT& S) const { return SimpleTy >= S.SimpleTy; }
-    bool operator<=(const MVT& S) const { return SimpleTy <= S.SimpleTy; }
-
-    /// Return true if this is a valid simple valuetype.
-    bool isValid() const {
-      return (SimpleTy >= MVT::FIRST_VALUETYPE &&
-              SimpleTy < MVT::LAST_VALUETYPE);
-    }
-
-    /// Return true if this is a FP or a vector FP type.
-    bool isFloatingPoint() const {
-      return ((SimpleTy >= MVT::FIRST_FP_VALUETYPE &&
-               SimpleTy <= MVT::LAST_FP_VALUETYPE) ||
-              (SimpleTy >= MVT::FIRST_FP_VECTOR_VALUETYPE &&
-               SimpleTy <= MVT::LAST_FP_VECTOR_VALUETYPE));
-    }
-
-    /// Return true if this is an integer or a vector integer type.
-    bool isInteger() const {
-      return ((SimpleTy >= MVT::FIRST_INTEGER_VALUETYPE &&
-               SimpleTy <= MVT::LAST_INTEGER_VALUETYPE) ||
-              (SimpleTy >= MVT::FIRST_INTEGER_VECTOR_VALUETYPE &&
-               SimpleTy <= MVT::LAST_INTEGER_VECTOR_VALUETYPE));
-    }
-
-    /// Return true if this is an integer, not including vectors.
-    bool isScalarInteger() const {
-      return (SimpleTy >= MVT::FIRST_INTEGER_VALUETYPE &&
-              SimpleTy <= MVT::LAST_INTEGER_VALUETYPE);
-    }
-
-    /// Return true if this is a vector value type.
-    bool isVector() const {
-      return (SimpleTy >= MVT::FIRST_VECTOR_VALUETYPE &&
-              SimpleTy <= MVT::LAST_VECTOR_VALUETYPE);
-    }
-
-    /// Return true if this is a vector value type where the
-    /// runtime length is machine dependent
-    bool isScalableVector() const {
-      return ((SimpleTy >= MVT::FIRST_INTEGER_SCALABLE_VALUETYPE &&
-               SimpleTy <= MVT::LAST_INTEGER_SCALABLE_VALUETYPE) ||
-              (SimpleTy >= MVT::FIRST_FP_SCALABLE_VALUETYPE &&
-               SimpleTy <= MVT::LAST_FP_SCALABLE_VALUETYPE));
-    }
-
-    /// Return true if this is a 16-bit vector type.
-    bool is16BitVector() const {
-      return (SimpleTy == MVT::v2i8  || SimpleTy == MVT::v1i16 ||
-              SimpleTy == MVT::v16i1);
-    }
-
-    /// Return true if this is a 32-bit vector type.
-    bool is32BitVector() const {
-      return (SimpleTy == MVT::v32i1 || SimpleTy == MVT::v4i8  ||
-              SimpleTy == MVT::v2i16 || SimpleTy == MVT::v1i32 ||
-              SimpleTy == MVT::v2f16 || SimpleTy == MVT::v1f32);
-    }
-
-    /// Return true if this is a 64-bit vector type.
-    bool is64BitVector() const {
-      return (SimpleTy == MVT::v64i1 || SimpleTy == MVT::v8i8  ||
-              SimpleTy == MVT::v4i16 || SimpleTy == MVT::v2i32 ||
-              SimpleTy == MVT::v1i64 || SimpleTy == MVT::v4f16 ||
-              SimpleTy == MVT::v2f32 || SimpleTy == MVT::v1f64);
-    }
-
-    /// Return true if this is a 128-bit vector type.
-    bool is128BitVector() const {
-      return (SimpleTy == MVT::v128i1 || SimpleTy == MVT::v16i8  ||
-              SimpleTy == MVT::v8i16  || SimpleTy == MVT::v4i32  ||
-              SimpleTy == MVT::v2i64  || SimpleTy == MVT::v1i128 ||
-              SimpleTy == MVT::v8f16  || SimpleTy == MVT::v4f32  ||
-              SimpleTy == MVT::v2f64);
-    }
-
-    /// Return true if this is a 256-bit vector type.
-    bool is256BitVector() const {
-      return (SimpleTy == MVT::v8f32 || SimpleTy == MVT::v4f64  ||
-              SimpleTy == MVT::v32i8 || SimpleTy == MVT::v16i16 ||
-              SimpleTy == MVT::v8i32 || SimpleTy == MVT::v4i64);
-    }
-
-    /// Return true if this is a 512-bit vector type.
-    bool is512BitVector() const {
-      return (SimpleTy == MVT::v16f32 || SimpleTy == MVT::v8f64  ||
-              SimpleTy == MVT::v512i1 || SimpleTy == MVT::v64i8  ||
-              SimpleTy == MVT::v32i16 || SimpleTy == MVT::v16i32 ||
-              SimpleTy == MVT::v8i64);
-    }
-
-    /// Return true if this is a 1024-bit vector type.
-    bool is1024BitVector() const {
-      return (SimpleTy == MVT::v1024i1 || SimpleTy == MVT::v128i8 ||
-              SimpleTy == MVT::v64i16  || SimpleTy == MVT::v32i32 ||
-              SimpleTy == MVT::v16i64);
-    }
-
-    /// Return true if this is a 1024-bit vector type.
-    bool is2048BitVector() const {
-      return (SimpleTy == MVT::v256i8 || SimpleTy == MVT::v128i16 ||
-              SimpleTy == MVT::v64i32 || SimpleTy == MVT::v32i64);
-    }
-
-    /// Return true if this is an overloaded type for TableGen.
-    bool isOverloaded() const {
-      return (SimpleTy==MVT::Any  ||
-              SimpleTy==MVT::iAny || SimpleTy==MVT::fAny ||
-              SimpleTy==MVT::vAny || SimpleTy==MVT::iPTRAny);
-    }
-
-    /// Returns true if the given vector is a power of 2.
-    bool isPow2VectorType() const {
-      unsigned NElts = getVectorNumElements();
-      return !(NElts & (NElts - 1));
-    }
-
-    /// Widens the length of the given vector MVT up to the nearest power of 2
-    /// and returns that type.
-    MVT getPow2VectorType() const {
-      if (isPow2VectorType())
-        return *this;
-
-      unsigned NElts = getVectorNumElements();
-      unsigned Pow2NElts = 1 << Log2_32_Ceil(NElts);
-      return MVT::getVectorVT(getVectorElementType(), Pow2NElts);
-    }
-
-    /// If this is a vector, return the element type, otherwise return this.
-    MVT getScalarType() const {
-      return isVector() ? getVectorElementType() : *this;
-    }
-
-    MVT getVectorElementType() const {
-      switch (SimpleTy) {
-      default:
-        llvm_unreachable("Not a vector MVT!");
-      case v1i1:
-      case v2i1:
-      case v4i1:
-      case v8i1:
-      case v16i1:
-      case v32i1:
-      case v64i1:
-      case v128i1:
-      case v512i1:
-      case v1024i1:
-      case nxv1i1:
-      case nxv2i1:
-      case nxv4i1:
-      case nxv8i1:
-      case nxv16i1:
-      case nxv32i1: return i1;
-      case v1i8:
-      case v2i8:
-      case v4i8:
-      case v8i8:
-      case v16i8:
-      case v32i8:
-      case v64i8:
-      case v128i8:
-      case v256i8:
-      case nxv1i8:
-      case nxv2i8:
-      case nxv4i8:
-      case nxv8i8:
-      case nxv16i8:
-      case nxv32i8: return i8;
-      case v1i16:
-      case v2i16:
-      case v4i16:
-      case v8i16:
-      case v16i16:
-      case v32i16:
-      case v64i16:
-      case v128i16:
-      case nxv1i16:
-      case nxv2i16:
-      case nxv4i16:
-      case nxv8i16:
-      case nxv16i16:
-      case nxv32i16: return i16;
-      case v1i32:
-      case v2i32:
-      case v4i32:
-      case v8i32:
-      case v16i32:
-      case v32i32:
-      case v64i32:
-      case nxv1i32:
-      case nxv2i32:
-      case nxv4i32:
-      case nxv8i32:
-      case nxv16i32:
-      case nxv32i32: return i32;
-      case v1i64:
-      case v2i64:
-      case v4i64:
-      case v8i64:
-      case v16i64:
-      case v32i64:
-      case nxv1i64:
-      case nxv2i64:
-      case nxv4i64:
-      case nxv8i64:
-      case nxv16i64:
-      case nxv32i64: return i64;
-      case v1i128: return i128;
-      case v2f16:
-      case v4f16:
-      case v8f16:
-      case nxv2f16:
-      case nxv4f16:
-      case nxv8f16: return f16;
-      case v1f32:
-      case v2f32:
-      case v4f32:
-      case v8f32:
-      case v16f32:
-      case nxv1f32:
-      case nxv2f32:
-      case nxv4f32:
-      case nxv8f32:
-      case nxv16f32: return f32;
-      case v1f64:
-      case v2f64:
-      case v4f64:
-      case v8f64:
-      case nxv1f64:
-      case nxv2f64:
-      case nxv4f64:
-      case nxv8f64: return f64;
-      }
-    }
-
-    unsigned getVectorNumElements() const {
-      switch (SimpleTy) {
-      default:
-        llvm_unreachable("Not a vector MVT!");
-      case v1024i1: return 1024;
-      case v512i1: return 512;
-      case v256i8: return 256;
-      case v128i1:
-      case v128i8:
-      case v128i16: return 128;
-      case v64i1:
-      case v64i8:
-      case v64i16:
-      case v64i32: return 64;
-      case v32i1:
-      case v32i8:
-      case v32i16:
-      case v32i32:
-      case v32i64:
-      case nxv32i1:
-      case nxv32i8:
-      case nxv32i16:
-      case nxv32i32:
-      case nxv32i64: return 32;
-      case v16i1:
-      case v16i8:
-      case v16i16:
-      case v16i32:
-      case v16i64:
-      case v16f32:
-      case nxv16i1:
-      case nxv16i8:
-      case nxv16i16:
-      case nxv16i32:
-      case nxv16i64:
-      case nxv16f32: return 16;
-      case v8i1:
-      case v8i8:
-      case v8i16:
-      case v8i32:
-      case v8i64:
-      case v8f16:
-      case v8f32:
-      case v8f64:
-      case nxv8i1:
-      case nxv8i8:
-      case nxv8i16:
-      case nxv8i32:
-      case nxv8i64:
-      case nxv8f16:
-      case nxv8f32:
-      case nxv8f64: return 8;
-      case v4i1:
-      case v4i8:
-      case v4i16:
-      case v4i32:
-      case v4i64:
-      case v4f16:
-      case v4f32:
-      case v4f64:
-      case nxv4i1:
-      case nxv4i8:
-      case nxv4i16:
-      case nxv4i32:
-      case nxv4i64:
-      case nxv4f16:
-      case nxv4f32:
-      case nxv4f64: return 4;
-      case v2i1:
-      case v2i8:
-      case v2i16:
-      case v2i32:
-      case v2i64:
-      case v2f16:
-      case v2f32:
-      case v2f64:
-      case nxv2i1:
-      case nxv2i8:
-      case nxv2i16:
-      case nxv2i32:
-      case nxv2i64:
-      case nxv2f16:
-      case nxv2f32:
-      case nxv2f64: return 2;
-      case v1i1:
-      case v1i8:
-      case v1i16:
-      case v1i32:
-      case v1i64:
-      case v1i128:
-      case v1f32:
-      case v1f64:
-      case nxv1i1:
-      case nxv1i8:
-      case nxv1i16:
-      case nxv1i32:
-      case nxv1i64:
-      case nxv1f32:
-      case nxv1f64: return 1;
-      }
-    }
-
-    MVT::ElementCount getVectorElementCount() const {
-      return { getVectorNumElements(), isScalableVector() };
-    }
-
-    unsigned getSizeInBits() const {
-      switch (SimpleTy) {
-      default:
-        llvm_unreachable("getSizeInBits called on extended MVT.");
-      case Other:
-        llvm_unreachable("Value type is non-standard value, Other.");
-      case iPTR:
-        llvm_unreachable("Value type size is target-dependent. Ask TLI.");
-      case iPTRAny:
-      case iAny:
-      case fAny:
-      case vAny:
-      case Any:
-        llvm_unreachable("Value type is overloaded.");
-      case token:
-        llvm_unreachable("Token type is a sentinel that cannot be used "
-                         "in codegen and has no size");
-      case Metadata:
-        llvm_unreachable("Value type is metadata.");
-      case i1:
-      case v1i1:
-      case nxv1i1: return 1;
-      case v2i1:
-      case nxv2i1: return 2;
-      case v4i1:
-      case nxv4i1: return 4;
-      case i8  :
-      case v1i8:
-      case v8i1:
-      case nxv1i8:
-      case nxv8i1: return 8;
-      case i16 :
-      case f16:
-      case v16i1:
-      case v2i8:
-      case v1i16:
-      case nxv16i1:
-      case nxv2i8:
-      case nxv1i16: return 16;
-      case f32 :
-      case i32 :
-      case v32i1:
-      case v4i8:
-      case v2i16:
-      case v2f16:
-      case v1f32:
-      case v1i32:
-      case nxv32i1:
-      case nxv4i8:
-      case nxv2i16:
-      case nxv1i32:
-      case nxv2f16:
-      case nxv1f32: return 32;
-      case x86mmx:
-      case f64 :
-      case i64 :
-      case v64i1:
-      case v8i8:
-      case v4i16:
-      case v2i32:
-      case v1i64:
-      case v4f16:
-      case v2f32:
-      case v1f64:
-      case nxv8i8:
-      case nxv4i16:
-      case nxv2i32:
-      case nxv1i64:
-      case nxv4f16:
-      case nxv2f32:
-      case nxv1f64: return 64;
-      case f80 :  return 80;
-      case f128:
-      case ppcf128:
-      case i128:
-      case v128i1:
-      case v16i8:
-      case v8i16:
-      case v4i32:
-      case v2i64:
-      case v1i128:
-      case v8f16:
-      case v4f32:
-      case v2f64:
-      case nxv16i8:
-      case nxv8i16:
-      case nxv4i32:
-      case nxv2i64:
-      case nxv8f16:
-      case nxv4f32:
-      case nxv2f64: return 128;
-      case v32i8:
-      case v16i16:
-      case v8i32:
-      case v4i64:
-      case v8f32:
-      case v4f64:
-      case nxv32i8:
-      case nxv16i16:
-      case nxv8i32:
-      case nxv4i64:
-      case nxv8f32:
-      case nxv4f64: return 256;
-      case v512i1:
-      case v64i8:
-      case v32i16:
-      case v16i32:
-      case v8i64:
-      case v16f32:
-      case v8f64:
-      case nxv32i16:
-      case nxv16i32:
-      case nxv8i64:
-      case nxv16f32:
-      case nxv8f64: return 512;
-      case v1024i1:
-      case v128i8:
-      case v64i16:
-      case v32i32:
-      case v16i64:
-      case nxv32i32:
-      case nxv16i64: return 1024;
-      case v256i8:
-      case v128i16:
-      case v64i32:
-      case v32i64:
-      case nxv32i64: return 2048;
-      }
-    }
-
-    unsigned getScalarSizeInBits() const {
-      return getScalarType().getSizeInBits();
-    }
-
-    /// Return the number of bytes overwritten by a store of the specified value
-    /// type.
-    unsigned getStoreSize() const {
-      return (getSizeInBits() + 7) / 8;
-    }
-
-    /// Return the number of bits overwritten by a store of the specified value
-    /// type.
-    unsigned getStoreSizeInBits() const {
-      return getStoreSize() * 8;
-    }
-
-    /// Return true if this has more bits than VT.
-    bool bitsGT(MVT VT) const {
-      return getSizeInBits() > VT.getSizeInBits();
-    }
-
-    /// Return true if this has no less bits than VT.
-    bool bitsGE(MVT VT) const {
-      return getSizeInBits() >= VT.getSizeInBits();
-    }
-
-    /// Return true if this has less bits than VT.
-    bool bitsLT(MVT VT) const {
-      return getSizeInBits() < VT.getSizeInBits();
-    }
-
-    /// Return true if this has no more bits than VT.
-    bool bitsLE(MVT VT) const {
-      return getSizeInBits() <= VT.getSizeInBits();
-    }
-
-    static MVT getFloatingPointVT(unsigned BitWidth) {
-      switch (BitWidth) {
-      default:
-        llvm_unreachable("Bad bit width!");
-      case 16:
-        return MVT::f16;
-      case 32:
-        return MVT::f32;
-      case 64:
-        return MVT::f64;
-      case 80:
-        return MVT::f80;
-      case 128:
-        return MVT::f128;
-      }
-    }
-
-    static MVT getIntegerVT(unsigned BitWidth) {
-      switch (BitWidth) {
-      default:
-        return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
-      case 1:
-        return MVT::i1;
-      case 8:
-        return MVT::i8;
-      case 16:
-        return MVT::i16;
-      case 32:
-        return MVT::i32;
-      case 64:
-        return MVT::i64;
-      case 128:
-        return MVT::i128;
-      }
-    }
-
-    static MVT getVectorVT(MVT VT, unsigned NumElements) {
-      switch (VT.SimpleTy) {
-      default:
-        break;
-      case MVT::i1:
-        if (NumElements == 1)    return MVT::v1i1;
-        if (NumElements == 2)    return MVT::v2i1;
-        if (NumElements == 4)    return MVT::v4i1;
-        if (NumElements == 8)    return MVT::v8i1;
-        if (NumElements == 16)   return MVT::v16i1;
-        if (NumElements == 32)   return MVT::v32i1;
-        if (NumElements == 64)   return MVT::v64i1;
-        if (NumElements == 128)  return MVT::v128i1;
-        if (NumElements == 512)  return MVT::v512i1;
-        if (NumElements == 1024) return MVT::v1024i1;
-        break;
-      case MVT::i8:
-        if (NumElements == 1)   return MVT::v1i8;
-        if (NumElements == 2)   return MVT::v2i8;
-        if (NumElements == 4)   return MVT::v4i8;
-        if (NumElements == 8)   return MVT::v8i8;
-        if (NumElements == 16)  return MVT::v16i8;
-        if (NumElements == 32)  return MVT::v32i8;
-        if (NumElements == 64)  return MVT::v64i8;
-        if (NumElements == 128) return MVT::v128i8;
-        if (NumElements == 256) return MVT::v256i8;
-        break;
-      case MVT::i16:
-        if (NumElements == 1)   return MVT::v1i16;
-        if (NumElements == 2)   return MVT::v2i16;
-        if (NumElements == 4)   return MVT::v4i16;
-        if (NumElements == 8)   return MVT::v8i16;
-        if (NumElements == 16)  return MVT::v16i16;
-        if (NumElements == 32)  return MVT::v32i16;
-        if (NumElements == 64)  return MVT::v64i16;
-        if (NumElements == 128) return MVT::v128i16;
-        break;
-      case MVT::i32:
-        if (NumElements == 1)  return MVT::v1i32;
-        if (NumElements == 2)  return MVT::v2i32;
-        if (NumElements == 4)  return MVT::v4i32;
-        if (NumElements == 8)  return MVT::v8i32;
-        if (NumElements == 16) return MVT::v16i32;
-        if (NumElements == 32) return MVT::v32i32;
-        if (NumElements == 64) return MVT::v64i32;
-        break;
-      case MVT::i64:
-        if (NumElements == 1)  return MVT::v1i64;
-        if (NumElements == 2)  return MVT::v2i64;
-        if (NumElements == 4)  return MVT::v4i64;
-        if (NumElements == 8)  return MVT::v8i64;
-        if (NumElements == 16) return MVT::v16i64;
-        if (NumElements == 32) return MVT::v32i64;
-        break;
-      case MVT::i128:
-        if (NumElements == 1)  return MVT::v1i128;
-        break;
-      case MVT::f16:
-        if (NumElements == 2)  return MVT::v2f16;
-        if (NumElements == 4)  return MVT::v4f16;
-        if (NumElements == 8)  return MVT::v8f16;
-        break;
-      case MVT::f32:
-        if (NumElements == 1)  return MVT::v1f32;
-        if (NumElements == 2)  return MVT::v2f32;
-        if (NumElements == 4)  return MVT::v4f32;
-        if (NumElements == 8)  return MVT::v8f32;
-        if (NumElements == 16) return MVT::v16f32;
-        break;
-      case MVT::f64:
-        if (NumElements == 1)  return MVT::v1f64;
-        if (NumElements == 2)  return MVT::v2f64;
-        if (NumElements == 4)  return MVT::v4f64;
-        if (NumElements == 8)  return MVT::v8f64;
-        break;
-      }
-      return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
-    }
-
-    static MVT getScalableVectorVT(MVT VT, unsigned NumElements) {
-      switch(VT.SimpleTy) {
-        default:
-          break;
-        case MVT::i1:
-          if (NumElements == 1)  return MVT::nxv1i1;
-          if (NumElements == 2)  return MVT::nxv2i1;
-          if (NumElements == 4)  return MVT::nxv4i1;
-          if (NumElements == 8)  return MVT::nxv8i1;
-          if (NumElements == 16) return MVT::nxv16i1;
-          if (NumElements == 32) return MVT::nxv32i1;
-          break;
-        case MVT::i8:
-          if (NumElements == 1)  return MVT::nxv1i8;
-          if (NumElements == 2)  return MVT::nxv2i8;
-          if (NumElements == 4)  return MVT::nxv4i8;
-          if (NumElements == 8)  return MVT::nxv8i8;
-          if (NumElements == 16) return MVT::nxv16i8;
-          if (NumElements == 32) return MVT::nxv32i8;
-          break;
-        case MVT::i16:
-          if (NumElements == 1)  return MVT::nxv1i16;
-          if (NumElements == 2)  return MVT::nxv2i16;
-          if (NumElements == 4)  return MVT::nxv4i16;
-          if (NumElements == 8)  return MVT::nxv8i16;
-          if (NumElements == 16) return MVT::nxv16i16;
-          if (NumElements == 32) return MVT::nxv32i16;
-          break;
-        case MVT::i32:
-          if (NumElements == 1)  return MVT::nxv1i32;
-          if (NumElements == 2)  return MVT::nxv2i32;
-          if (NumElements == 4)  return MVT::nxv4i32;
-          if (NumElements == 8)  return MVT::nxv8i32;
-          if (NumElements == 16) return MVT::nxv16i32;
-          if (NumElements == 32) return MVT::nxv32i32;
-          break;
-        case MVT::i64:
-          if (NumElements == 1)  return MVT::nxv1i64;
-          if (NumElements == 2)  return MVT::nxv2i64;
-          if (NumElements == 4)  return MVT::nxv4i64;
-          if (NumElements == 8)  return MVT::nxv8i64;
-          if (NumElements == 16) return MVT::nxv16i64;
-          if (NumElements == 32) return MVT::nxv32i64;
-          break;
-        case MVT::f16:
-          if (NumElements == 2)  return MVT::nxv2f16;
-          if (NumElements == 4)  return MVT::nxv4f16;
-          if (NumElements == 8)  return MVT::nxv8f16;
-          break;
-        case MVT::f32:
-          if (NumElements == 1)  return MVT::nxv1f32;
-          if (NumElements == 2)  return MVT::nxv2f32;
-          if (NumElements == 4)  return MVT::nxv4f32;
-          if (NumElements == 8)  return MVT::nxv8f32;
-          if (NumElements == 16) return MVT::nxv16f32;
-          break;
-        case MVT::f64:
-          if (NumElements == 1)  return MVT::nxv1f64;
-          if (NumElements == 2)  return MVT::nxv2f64;
-          if (NumElements == 4)  return MVT::nxv4f64;
-          if (NumElements == 8)  return MVT::nxv8f64;
-          break;
-      }
-      return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
-    }
-
-    static MVT getVectorVT(MVT VT, unsigned NumElements, bool IsScalable) {
-      if (IsScalable)
-        return getScalableVectorVT(VT, NumElements);
-      return getVectorVT(VT, NumElements);
-    }
-
-    static MVT getVectorVT(MVT VT, MVT::ElementCount EC) {
-      if (EC.Scalable)
-        return getScalableVectorVT(VT, EC.Min);
-      return getVectorVT(VT, EC.Min);
-    }
-
-    /// Return the value type corresponding to the specified type.  This returns
-    /// all pointers as iPTR.  If HandleUnknown is true, unknown types are
-    /// returned as Other, otherwise they are invalid.
-    static MVT getVT(Type *Ty, bool HandleUnknown = false);
-
-  private:
-    /// A simple iterator over the MVT::SimpleValueType enum.
-    struct mvt_iterator {
-      SimpleValueType VT;
-
-      mvt_iterator(SimpleValueType VT) : VT(VT) {}
-
-      MVT operator*() const { return VT; }
-      bool operator!=(const mvt_iterator &LHS) const { return VT != LHS.VT; }
-
-      mvt_iterator& operator++() {
-        VT = (MVT::SimpleValueType)((int)VT + 1);
-        assert((int)VT <= MVT::MAX_ALLOWED_VALUETYPE &&
-               "MVT iterator overflowed.");
-        return *this;
-      }
-    };
-
-    /// A range of the MVT::SimpleValueType enum.
-    using mvt_range = iterator_range<mvt_iterator>;
-
-  public:
-    /// SimpleValueType Iteration
-    /// @{
-    static mvt_range all_valuetypes() {
-      return mvt_range(MVT::FIRST_VALUETYPE, MVT::LAST_VALUETYPE);
-    }
-
-    static mvt_range integer_valuetypes() {
-      return mvt_range(MVT::FIRST_INTEGER_VALUETYPE,
-                       (MVT::SimpleValueType)(MVT::LAST_INTEGER_VALUETYPE + 1));
-    }
-
-    static mvt_range fp_valuetypes() {
-      return mvt_range(MVT::FIRST_FP_VALUETYPE,
-                       (MVT::SimpleValueType)(MVT::LAST_FP_VALUETYPE + 1));
-    }
-
-    static mvt_range vector_valuetypes() {
-      return mvt_range(MVT::FIRST_VECTOR_VALUETYPE,
-                       (MVT::SimpleValueType)(MVT::LAST_VECTOR_VALUETYPE + 1));
-    }
-
-    static mvt_range integer_vector_valuetypes() {
-      return mvt_range(
-          MVT::FIRST_INTEGER_VECTOR_VALUETYPE,
-          (MVT::SimpleValueType)(MVT::LAST_INTEGER_VECTOR_VALUETYPE + 1));
-    }
-
-    static mvt_range fp_vector_valuetypes() {
-      return mvt_range(
-          MVT::FIRST_FP_VECTOR_VALUETYPE,
-          (MVT::SimpleValueType)(MVT::LAST_FP_VECTOR_VALUETYPE + 1));
-    }
-
-    static mvt_range integer_scalable_vector_valuetypes() {
-      return mvt_range(MVT::FIRST_INTEGER_SCALABLE_VALUETYPE,
-              (MVT::SimpleValueType)(MVT::LAST_INTEGER_SCALABLE_VALUETYPE + 1));
-    }
-
-    static mvt_range fp_scalable_vector_valuetypes() {
-      return mvt_range(MVT::FIRST_FP_SCALABLE_VALUETYPE,
-                   (MVT::SimpleValueType)(MVT::LAST_FP_SCALABLE_VALUETYPE + 1));
-    }
-    /// @}
-  };
-
-} // end namespace llvm
-
-#endif // LLVM_CODEGEN_MACHINEVALUETYPE_H
diff --git a/include/llvm/CodeGen/MacroFusion.h b/include/llvm/CodeGen/MacroFusion.h
index dc105fdc68fd..a77226ddaf33 100644
--- a/include/llvm/CodeGen/MacroFusion.h
+++ b/include/llvm/CodeGen/MacroFusion.h
@@ -25,7 +25,7 @@ class ScheduleDAGMutation;
 class TargetInstrInfo;
 class TargetSubtargetInfo;
 
-/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused
+/// Check if the instr pair, FirstMI and SecondMI, should be fused
 /// together. Given SecondMI, when FirstMI is unspecified, then check if
 /// SecondMI may be part of a fused pair at all.
 using ShouldSchedulePredTy = std::function<bool(const TargetInstrInfo &TII,
@@ -33,13 +33,13 @@ using ShouldSchedulePredTy = std::function<bool(const TargetInstrInfo &TII,
                                                 const MachineInstr *FirstMI,
                                                 const MachineInstr &SecondMI)>;
 
-/// \brief Create a DAG scheduling mutation to pair instructions back to back
+/// Create a DAG scheduling mutation to pair instructions back to back
 /// for instructions that benefit according to the target-specific
 /// shouldScheduleAdjacent predicate function.
 std::unique_ptr<ScheduleDAGMutation>
 createMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent);
 
-/// \brief Create a DAG scheduling mutation to pair branch instructions with one
+/// Create a DAG scheduling mutation to pair branch instructions with one
 /// of their predecessors back to back for instructions that benefit according
 /// to the target-specific shouldScheduleAdjacent predicate function.
 std::unique_ptr<ScheduleDAGMutation>
diff --git a/include/llvm/CodeGen/PBQP/Graph.h b/include/llvm/CodeGen/PBQP/Graph.h
index e94878ced10d..a6d88b057dcb 100644
--- a/include/llvm/CodeGen/PBQP/Graph.h
+++ b/include/llvm/CodeGen/PBQP/Graph.h
@@ -29,12 +29,12 @@ namespace PBQP {
     using NodeId = unsigned;
     using EdgeId = unsigned;
 
-    /// @brief Returns a value representing an invalid (non-existent) node.
+    /// Returns a value representing an invalid (non-existent) node.
     static NodeId invalidNodeId() {
       return std::numeric_limits<NodeId>::max();
     }
 
-    /// @brief Returns a value representing an invalid (non-existent) edge.
+    /// Returns a value representing an invalid (non-existent) edge.
     static EdgeId invalidEdgeId() {
       return std::numeric_limits<EdgeId>::max();
     }
@@ -338,19 +338,19 @@ namespace PBQP {
       const NodeEntry &NE;
     };
 
-    /// @brief Construct an empty PBQP graph.
+    /// Construct an empty PBQP graph.
     Graph() = default;
 
-    /// @brief Construct an empty PBQP graph with the given graph metadata.
+    /// Construct an empty PBQP graph with the given graph metadata.
     Graph(GraphMetadata Metadata) : Metadata(std::move(Metadata)) {}
 
-    /// @brief Get a reference to the graph metadata.
+    /// Get a reference to the graph metadata.
     GraphMetadata& getMetadata() { return Metadata; }
 
-    /// @brief Get a const-reference to the graph metadata.
+    /// Get a const-reference to the graph metadata.
     const GraphMetadata& getMetadata() const { return Metadata; }
 
-    /// @brief Lock this graph to the given solver instance in preparation
+    /// Lock this graph to the given solver instance in preparation
     /// for running the solver. This method will call solver.handleAddNode for
     /// each node in the graph, and handleAddEdge for each edge, to give the
     /// solver an opportunity to set up any requried metadata.
@@ -363,13 +363,13 @@ namespace PBQP {
         Solver->handleAddEdge(EId);
     }
 
-    /// @brief Release from solver instance.
+    /// Release from solver instance.
     void unsetSolver() {
       assert(Solver && "Solver not set.");
       Solver = nullptr;
     }
 
-    /// @brief Add a node with the given costs.
+    /// Add a node with the given costs.
     /// @param Costs Cost vector for the new node.
     /// @return Node iterator for the added node.
     template <typename OtherVectorT>
@@ -382,7 +382,7 @@ namespace PBQP {
       return NId;
     }
 
-    /// @brief Add a node bypassing the cost allocator.
+    /// Add a node bypassing the cost allocator.
     /// @param Costs Cost vector ptr for the new node (must be convertible to
     ///        VectorPtr).
     /// @return Node iterator for the added node.
@@ -401,7 +401,7 @@ namespace PBQP {
       return NId;
     }
 
-    /// @brief Add an edge between the given nodes with the given costs.
+    /// Add an edge between the given nodes with the given costs.
     /// @param N1Id First node.
     /// @param N2Id Second node.
     /// @param Costs Cost matrix for new edge.
@@ -419,7 +419,7 @@ namespace PBQP {
       return EId;
     }
 
-    /// @brief Add an edge bypassing the cost allocator.
+    /// Add an edge bypassing the cost allocator.
     /// @param N1Id First node.
     /// @param N2Id Second node.
     /// @param Costs Cost matrix for new edge.
@@ -444,7 +444,7 @@ namespace PBQP {
       return EId;
     }
 
-    /// @brief Returns true if the graph is empty.
+    /// Returns true if the graph is empty.
     bool empty() const { return NodeIdSet(*this).empty(); }
 
     NodeIdSet nodeIds() const { return NodeIdSet(*this); }
@@ -452,15 +452,15 @@ namespace PBQP {
 
     AdjEdgeIdSet adjEdgeIds(NodeId NId) { return AdjEdgeIdSet(getNode(NId)); }
 
-    /// @brief Get the number of nodes in the graph.
+    /// Get the number of nodes in the graph.
     /// @return Number of nodes in the graph.
     unsigned getNumNodes() const { return NodeIdSet(*this).size(); }
 
-    /// @brief Get the number of edges in the graph.
+    /// Get the number of edges in the graph.
     /// @return Number of edges in the graph.
     unsigned getNumEdges() const { return EdgeIdSet(*this).size(); }
 
-    /// @brief Set a node's cost vector.
+    /// Set a node's cost vector.
     /// @param NId Node to update.
     /// @param Costs New costs to set.
     template <typename OtherVectorT>
@@ -471,7 +471,7 @@ namespace PBQP {
       getNode(NId).Costs = AllocatedCosts;
     }
 
-    /// @brief Get a VectorPtr to a node's cost vector. Rarely useful - use
+    /// Get a VectorPtr to a node's cost vector. Rarely useful - use
     ///        getNodeCosts where possible.
     /// @param NId Node id.
     /// @return VectorPtr to node cost vector.
@@ -483,7 +483,7 @@ namespace PBQP {
       return getNode(NId).Costs;
     }
 
-    /// @brief Get a node's cost vector.
+    /// Get a node's cost vector.
     /// @param NId Node id.
     /// @return Node cost vector.
     const Vector& getNodeCosts(NodeId NId) const {
@@ -502,7 +502,7 @@ namespace PBQP {
       return getNode(NId).getAdjEdgeIds().size();
     }
 
-    /// @brief Update an edge's cost matrix.
+    /// Update an edge's cost matrix.
     /// @param EId Edge id.
     /// @param Costs New cost matrix.
     template <typename OtherMatrixT>
@@ -513,7 +513,7 @@ namespace PBQP {
       getEdge(EId).Costs = AllocatedCosts;
     }
 
-    /// @brief Get a MatrixPtr to a node's cost matrix. Rarely useful - use
+    /// Get a MatrixPtr to a node's cost matrix. Rarely useful - use
     ///        getEdgeCosts where possible.
     /// @param EId Edge id.
     /// @return MatrixPtr to edge cost matrix.
@@ -525,7 +525,7 @@ namespace PBQP {
       return getEdge(EId).Costs;
     }
 
-    /// @brief Get an edge's cost matrix.
+    /// Get an edge's cost matrix.
     /// @param EId Edge id.
     /// @return Edge cost matrix.
     const Matrix& getEdgeCosts(EdgeId EId) const {
@@ -540,21 +540,21 @@ namespace PBQP {
       return getEdge(EId).Metadata;
     }
 
-    /// @brief Get the first node connected to this edge.
+    /// Get the first node connected to this edge.
     /// @param EId Edge id.
     /// @return The first node connected to the given edge.
     NodeId getEdgeNode1Id(EdgeId EId) const {
       return getEdge(EId).getN1Id();
     }
 
-    /// @brief Get the second node connected to this edge.
+    /// Get the second node connected to this edge.
     /// @param EId Edge id.
     /// @return The second node connected to the given edge.
     NodeId getEdgeNode2Id(EdgeId EId) const {
       return getEdge(EId).getN2Id();
     }
 
-    /// @brief Get the "other" node connected to this edge.
+    /// Get the "other" node connected to this edge.
     /// @param EId Edge id.
     /// @param NId Node id for the "given" node.
     /// @return The iterator for the "other" node connected to this edge.
@@ -566,7 +566,7 @@ namespace PBQP {
       return E.getN1Id();
     }
 
-    /// @brief Get the edge connecting two nodes.
+    /// Get the edge connecting two nodes.
     /// @param N1Id First node id.
     /// @param N2Id Second node id.
     /// @return An id for edge (N1Id, N2Id) if such an edge exists,
@@ -581,7 +581,7 @@ namespace PBQP {
       return invalidEdgeId();
     }
 
-    /// @brief Remove a node from the graph.
+    /// Remove a node from the graph.
     /// @param NId Node id.
     void removeNode(NodeId NId) {
       if (Solver)
@@ -598,7 +598,7 @@ namespace PBQP {
       FreeNodeIds.push_back(NId);
     }
 
-    /// @brief Disconnect an edge from the given node.
+    /// Disconnect an edge from the given node.
     ///
     /// Removes the given edge from the adjacency list of the given node.
     /// This operation leaves the edge in an 'asymmetric' state: It will no
@@ -631,14 +631,14 @@ namespace PBQP {
       E.disconnectFrom(*this, NId);
     }
 
-    /// @brief Convenience method to disconnect all neighbours from the given
+    /// Convenience method to disconnect all neighbours from the given
     ///        node.
     void disconnectAllNeighborsFromNode(NodeId NId) {
       for (auto AEId : adjEdgeIds(NId))
         disconnectEdge(AEId, getEdgeOtherNodeId(AEId, NId));
     }
 
-    /// @brief Re-attach an edge to its nodes.
+    /// Re-attach an edge to its nodes.
     ///
     /// Adds an edge that had been previously disconnected back into the
     /// adjacency set of the nodes that the edge connects.
@@ -649,7 +649,7 @@ namespace PBQP {
         Solver->handleReconnectEdge(EId, NId);
     }
 
-    /// @brief Remove an edge from the graph.
+    /// Remove an edge from the graph.
     /// @param EId Edge id.
     void removeEdge(EdgeId EId) {
       if (Solver)
@@ -660,7 +660,7 @@ namespace PBQP {
       Edges[EId].invalidate();
     }
 
-    /// @brief Remove all nodes and edges from the graph.
+    /// Remove all nodes and edges from the graph.
     void clear() {
       Nodes.clear();
       FreeNodeIds.clear();
diff --git a/include/llvm/CodeGen/PBQP/Math.h b/include/llvm/CodeGen/PBQP/Math.h
index ba405e816d10..d1432a3053c4 100644
--- a/include/llvm/CodeGen/PBQP/Math.h
+++ b/include/llvm/CodeGen/PBQP/Math.h
@@ -22,34 +22,34 @@ namespace PBQP {
 
 using PBQPNum = float;
 
-/// \brief PBQP Vector class.
+/// PBQP Vector class.
 class Vector {
   friend hash_code hash_value(const Vector &);
 
 public:
-  /// \brief Construct a PBQP vector of the given size.
+  /// Construct a PBQP vector of the given size.
   explicit Vector(unsigned Length)
     : Length(Length), Data(llvm::make_unique<PBQPNum []>(Length)) {}
 
-  /// \brief Construct a PBQP vector with initializer.
+  /// Construct a PBQP vector with initializer.
   Vector(unsigned Length, PBQPNum InitVal)
     : Length(Length), Data(llvm::make_unique<PBQPNum []>(Length)) {
     std::fill(Data.get(), Data.get() + Length, InitVal);
   }
 
-  /// \brief Copy construct a PBQP vector.
+  /// Copy construct a PBQP vector.
   Vector(const Vector &V)
     : Length(V.Length), Data(llvm::make_unique<PBQPNum []>(Length)) {
     std::copy(V.Data.get(), V.Data.get() + Length, Data.get());
   }
 
-  /// \brief Move construct a PBQP vector.
+  /// Move construct a PBQP vector.
   Vector(Vector &&V)
     : Length(V.Length), Data(std::move(V.Data)) {
     V.Length = 0;
   }
 
-  /// \brief Comparison operator.
+  /// Comparison operator.
   bool operator==(const Vector &V) const {
     assert(Length != 0 && Data && "Invalid vector");
     if (Length != V.Length)
@@ -57,27 +57,27 @@ public:
     return std::equal(Data.get(), Data.get() + Length, V.Data.get());
   }
 
-  /// \brief Return the length of the vector
+  /// Return the length of the vector
   unsigned getLength() const {
     assert(Length != 0 && Data && "Invalid vector");
     return Length;
   }
 
-  /// \brief Element access.
+  /// Element access.
   PBQPNum& operator[](unsigned Index) {
     assert(Length != 0 && Data && "Invalid vector");
     assert(Index < Length && "Vector element access out of bounds.");
     return Data[Index];
   }
 
-  /// \brief Const element access.
+  /// Const element access.
   const PBQPNum& operator[](unsigned Index) const {
     assert(Length != 0 && Data && "Invalid vector");
     assert(Index < Length && "Vector element access out of bounds.");
     return Data[Index];
   }
 
-  /// \brief Add another vector to this one.
+  /// Add another vector to this one.
   Vector& operator+=(const Vector &V) {
     assert(Length != 0 && Data && "Invalid vector");
     assert(Length == V.Length && "Vector length mismatch.");
@@ -86,7 +86,7 @@ public:
     return *this;
   }
 
-  /// \brief Returns the index of the minimum value in this vector
+  /// Returns the index of the minimum value in this vector
   unsigned minIndex() const {
     assert(Length != 0 && Data && "Invalid vector");
     return std::min_element(Data.get(), Data.get() + Length) - Data.get();
@@ -97,14 +97,14 @@ private:
   std::unique_ptr<PBQPNum []> Data;
 };
 
-/// \brief Return a hash_value for the given vector.
+/// Return a hash_value for the given vector.
 inline hash_code hash_value(const Vector &V) {
   unsigned *VBegin = reinterpret_cast<unsigned*>(V.Data.get());
   unsigned *VEnd = reinterpret_cast<unsigned*>(V.Data.get() + V.Length);
   return hash_combine(V.Length, hash_combine_range(VBegin, VEnd));
 }
 
-/// \brief Output a textual representation of the given vector on the given
+/// Output a textual representation of the given vector on the given
 ///        output stream.
 template <typename OStream>
 OStream& operator<<(OStream &OS, const Vector &V) {
@@ -118,18 +118,18 @@ OStream& operator<<(OStream &OS, const Vector &V) {
   return OS;
 }
 
-/// \brief PBQP Matrix class
+/// PBQP Matrix class
 class Matrix {
 private:
   friend hash_code hash_value(const Matrix &);
 
 public:
-  /// \brief Construct a PBQP Matrix with the given dimensions.
+  /// Construct a PBQP Matrix with the given dimensions.
   Matrix(unsigned Rows, unsigned Cols) :
     Rows(Rows), Cols(Cols), Data(llvm::make_unique<PBQPNum []>(Rows * Cols)) {
   }
 
-  /// \brief Construct a PBQP Matrix with the given dimensions and initial
+  /// Construct a PBQP Matrix with the given dimensions and initial
   /// value.
   Matrix(unsigned Rows, unsigned Cols, PBQPNum InitVal)
     : Rows(Rows), Cols(Cols),
@@ -137,20 +137,20 @@ public:
     std::fill(Data.get(), Data.get() + (Rows * Cols), InitVal);
   }
 
-  /// \brief Copy construct a PBQP matrix.
+  /// Copy construct a PBQP matrix.
   Matrix(const Matrix &M)
     : Rows(M.Rows), Cols(M.Cols),
       Data(llvm::make_unique<PBQPNum []>(Rows * Cols)) {
     std::copy(M.Data.get(), M.Data.get() + (Rows * Cols), Data.get());
   }
 
-  /// \brief Move construct a PBQP matrix.
+  /// Move construct a PBQP matrix.
   Matrix(Matrix &&M)
     : Rows(M.Rows), Cols(M.Cols), Data(std::move(M.Data)) {
     M.Rows = M.Cols = 0;
   }
 
-  /// \brief Comparison operator.
+  /// Comparison operator.
   bool operator==(const Matrix &M) const {
     assert(Rows != 0 && Cols != 0 && Data && "Invalid matrix");
     if (Rows != M.Rows || Cols != M.Cols)
@@ -158,33 +158,33 @@ public:
     return std::equal(Data.get(), Data.get() + (Rows * Cols), M.Data.get());
   }
 
-  /// \brief Return the number of rows in this matrix.
+  /// Return the number of rows in this matrix.
   unsigned getRows() const {
     assert(Rows != 0 && Cols != 0 && Data && "Invalid matrix");
     return Rows;
   }
 
-  /// \brief Return the number of cols in this matrix.
+  /// Return the number of cols in this matrix.
   unsigned getCols() const {
     assert(Rows != 0 && Cols != 0 && Data && "Invalid matrix");
     return Cols;
   }
 
-  /// \brief Matrix element access.
+  /// Matrix element access.
   PBQPNum* operator[](unsigned R) {
     assert(Rows != 0 && Cols != 0 && Data && "Invalid matrix");
     assert(R < Rows && "Row out of bounds.");
     return Data.get() + (R * Cols);
   }
 
-  /// \brief Matrix element access.
+  /// Matrix element access.
   const PBQPNum* operator[](unsigned R) const {
     assert(Rows != 0 && Cols != 0 && Data && "Invalid matrix");
     assert(R < Rows && "Row out of bounds.");
     return Data.get() + (R * Cols);
   }
 
-  /// \brief Returns the given row as a vector.
+  /// Returns the given row as a vector.
   Vector getRowAsVector(unsigned R) const {
     assert(Rows != 0 && Cols != 0 && Data && "Invalid matrix");
     Vector V(Cols);
@@ -193,7 +193,7 @@ public:
     return V;
   }
 
-  /// \brief Returns the given column as a vector.
+  /// Returns the given column as a vector.
   Vector getColAsVector(unsigned C) const {
     assert(Rows != 0 && Cols != 0 && Data && "Invalid matrix");
     Vector V(Rows);
@@ -202,7 +202,7 @@ public:
     return V;
   }
 
-  /// \brief Matrix transpose.
+  /// Matrix transpose.
   Matrix transpose() const {
     assert(Rows != 0 && Cols != 0 && Data && "Invalid matrix");
     Matrix M(Cols, Rows);
@@ -212,7 +212,7 @@ public:
     return M;
   }
 
-  /// \brief Add the given matrix to this one.
+  /// Add the given matrix to this one.
   Matrix& operator+=(const Matrix &M) {
     assert(Rows != 0 && Cols != 0 && Data && "Invalid matrix");
     assert(Rows == M.Rows && Cols == M.Cols &&
@@ -234,7 +234,7 @@ private:
   std::unique_ptr<PBQPNum []> Data;
 };
 
-/// \brief Return a hash_code for the given matrix.
+/// Return a hash_code for the given matrix.
 inline hash_code hash_value(const Matrix &M) {
   unsigned *MBegin = reinterpret_cast<unsigned*>(M.Data.get());
   unsigned *MEnd =
@@ -242,7 +242,7 @@ inline hash_code hash_value(const Matrix &M) {
   return hash_combine(M.Rows, M.Cols, hash_combine_range(MBegin, MEnd));
 }
 
-/// \brief Output a textual representation of the given matrix on the given
+/// Output a textual representation of the given matrix on the given
 ///        output stream.
 template <typename OStream>
 OStream& operator<<(OStream &OS, const Matrix &M) {
diff --git a/include/llvm/CodeGen/PBQP/ReductionRules.h b/include/llvm/CodeGen/PBQP/ReductionRules.h
index 8aeb51936760..21b99027970d 100644
--- a/include/llvm/CodeGen/PBQP/ReductionRules.h
+++ b/include/llvm/CodeGen/PBQP/ReductionRules.h
@@ -23,7 +23,7 @@
 namespace llvm {
 namespace PBQP {
 
-  /// \brief Reduce a node of degree one.
+  /// Reduce a node of degree one.
   ///
   /// Propagate costs from the given node, which must be of degree one, to its
   /// neighbor. Notify the problem domain.
@@ -166,7 +166,7 @@ namespace PBQP {
   }
 #endif
 
-  // \brief Find a solution to a fully reduced graph by backpropagation.
+  // Find a solution to a fully reduced graph by backpropagation.
   //
   // Given a graph and a reduction order, pop each node from the reduction
   // order and greedily compute a minimum solution based on the node costs, and
diff --git a/include/llvm/CodeGen/PBQP/Solution.h b/include/llvm/CodeGen/PBQP/Solution.h
index 6a247277fdfa..4d4379fbc2c2 100644
--- a/include/llvm/CodeGen/PBQP/Solution.h
+++ b/include/llvm/CodeGen/PBQP/Solution.h
@@ -21,7 +21,7 @@
 namespace llvm {
 namespace PBQP {
 
-  /// \brief Represents a solution to a PBQP problem.
+  /// Represents a solution to a PBQP problem.
   ///
   /// To get the selection for each node in the problem use the getSelection method.
   class Solution {
@@ -30,17 +30,17 @@ namespace PBQP {
     SelectionsMap selections;
 
   public:
-    /// \brief Initialise an empty solution.
+    /// Initialise an empty solution.
     Solution() = default;
 
-    /// \brief Set the selection for a given node.
+    /// Set the selection for a given node.
     /// @param nodeId Node id.
     /// @param selection Selection for nodeId.
     void setSelection(GraphBase::NodeId nodeId, unsigned selection) {
       selections[nodeId] = selection;
     }
 
-    /// \brief Get a node's selection.
+    /// Get a node's selection.
     /// @param nodeId Node id.
     /// @return The selection for nodeId;
     unsigned getSelection(GraphBase::NodeId nodeId) const {
diff --git a/include/llvm/CodeGen/PBQPRAConstraint.h b/include/llvm/CodeGen/PBQPRAConstraint.h
index 269b7a7b3a35..995467dc56d8 100644
--- a/include/llvm/CodeGen/PBQPRAConstraint.h
+++ b/include/llvm/CodeGen/PBQPRAConstraint.h
@@ -33,7 +33,7 @@ class PBQPRAGraph;
 
 using PBQPRAGraph = PBQP::RegAlloc::PBQPRAGraph;
 
-/// @brief Abstract base for classes implementing PBQP register allocation
+/// Abstract base for classes implementing PBQP register allocation
 ///        constraints (e.g. Spill-costs, interference, coalescing).
 class PBQPRAConstraint {
 public:
@@ -44,7 +44,7 @@ private:
   virtual void anchor();
 };
 
-/// @brief PBQP register allocation constraint composer.
+/// PBQP register allocation constraint composer.
 ///
 ///   Constraints added to this list will be applied, in the order that they are
 /// added, to the PBQP graph.
diff --git a/include/llvm/CodeGen/ParallelCG.h b/include/llvm/CodeGen/ParallelCG.h
index 14ef0ec408ba..dbf09ea31e20 100644
--- a/include/llvm/CodeGen/ParallelCG.h
+++ b/include/llvm/CodeGen/ParallelCG.h
@@ -40,7 +40,7 @@ std::unique_ptr<Module>
 splitCodeGen(std::unique_ptr<Module> M, ArrayRef<raw_pwrite_stream *> OSs,
              ArrayRef<llvm::raw_pwrite_stream *> BCOSs,
              const std::function<std::unique_ptr<TargetMachine>()> &TMFactory,
-             TargetMachine::CodeGenFileType FT = TargetMachine::CGFT_ObjectFile,
+             TargetMachine::CodeGenFileType FileType = TargetMachine::CGFT_ObjectFile,
              bool PreserveLocals = false);
 
 } // namespace llvm
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index 4370d116e08c..cb12b14f4435 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -154,6 +154,9 @@ namespace llvm {
   /// This pass adds dead/undef flags after analyzing subregister lanes.
   extern char &DetectDeadLanesID;
 
+  /// This pass perform post-ra machine sink for COPY instructions.
+  extern char &PostRAMachineSinkingID;
+
   /// FastRegisterAllocation Pass - This pass register allocates as fast as
   /// possible. It is best suited for debug code where live ranges are short.
   ///
@@ -212,6 +215,10 @@ namespace llvm {
   /// into tails of their predecessors.
   extern char &TailDuplicateID;
 
+  /// Duplicate blocks with unconditional branches into tails of their
+  /// predecessors. Variant that works before register allocation.
+  extern char &EarlyTailDuplicateID;
+
   /// MachineTraceMetrics - This pass computes critical path and CPU resource
   /// usage in an ensemble of traces.
   extern char &MachineTraceMetricsID;
@@ -269,9 +276,13 @@ namespace llvm {
   /// memory operations.
   extern char &ImplicitNullChecksID;
 
-  /// MachineLICM - This pass performs LICM on machine instructions.
+  /// This pass performs loop invariant code motion on machine instructions.
   extern char &MachineLICMID;
 
+  /// This pass performs loop invariant code motion on machine instructions.
+  /// This variant works before register allocation. \see MachineLICMID.
+  extern char &EarlyMachineLICMID;
+
   /// MachineSinking - This pass performs sinking on machine instructions.
   extern char &MachineSinkingID;
 
@@ -290,7 +301,7 @@ namespace llvm {
   /// StackSlotColoring - This pass performs stack slot coloring.
   extern char &StackSlotColoringID;
 
-  /// \brief This pass lays out funclets contiguously.
+  /// This pass lays out funclets contiguously.
   extern char &FuncletLayoutID;
 
   /// This pass inserts the XRay instrumentation sleds if they are supported by
@@ -300,7 +311,7 @@ namespace llvm {
   /// This pass inserts FEntry calls
   extern char &FEntryInserterID;
 
-  /// \brief This pass implements the "patchable-function" attribute.
+  /// This pass implements the "patchable-function" attribute.
   extern char &PatchableFunctionID;
 
   /// createStackProtectorPass - This pass adds stack protectors to functions.
@@ -318,13 +329,17 @@ namespace llvm {
 
   /// createWinEHPass - Prepares personality functions used by MSVC on Windows,
   /// in addition to the Itanium LSDA based personalities.
-  FunctionPass *createWinEHPass();
+  FunctionPass *createWinEHPass(bool DemoteCatchSwitchPHIOnly = false);
 
   /// createSjLjEHPreparePass - This pass adapts exception handling code to use
   /// the GCC-style builtin setjmp/longjmp (sjlj) to handling EH control flow.
   ///
   FunctionPass *createSjLjEHPreparePass();
 
+  /// createWasmEHPass - This pass adapts exception handling code to use
+  /// WebAssembly's exception handling scheme.
+  FunctionPass *createWasmEHPass();
+
   /// LocalStackSlotAllocation - This pass assigns local frame indices to stack
   /// slots relative to one another and allocates base registers to access them
   /// when it is estimated by the target to be out of range of normal frame
@@ -369,7 +384,7 @@ namespace llvm {
   ///
   ModulePass *createLowerEmuTLSPass();
 
-  /// This pass lowers the @llvm.load.relative intrinsic to instructions.
+  /// This pass lowers the \@llvm.load.relative intrinsic to instructions.
   /// This is unsafe to do earlier because a pass may combine the constant
   /// initializer into the load, which may result in an overflowing evaluation.
   ModulePass *createPreISelIntrinsicLoweringPass();
@@ -408,7 +423,7 @@ namespace llvm {
 
   /// This pass performs outlining on machine instructions directly before
   /// printing assembly.
-  ModulePass *createMachineOutlinerPass(bool OutlineFromLinkOnceODRs = false);
+  ModulePass *createMachineOutlinerPass(bool RunOnAllFunctions = true);
 
   /// This pass expands the experimental reduction intrinsics into sequences of
   /// shuffles.
@@ -417,6 +432,15 @@ namespace llvm {
   // This pass expands memcmp() to load/stores.
   FunctionPass *createExpandMemCmpPass();
 
+  /// Creates Break False Dependencies pass. \see BreakFalseDeps.cpp
+  FunctionPass *createBreakFalseDeps();
+
+  // This pass expands indirectbr instructions.
+  FunctionPass *createIndirectBrExpandPass();
+
+  /// Creates CFI Instruction Inserter pass. \see CFIInstrInserter.cpp
+  FunctionPass *createCFIInstrInserter();
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/ReachingDefAnalysis.h b/include/llvm/CodeGen/ReachingDefAnalysis.h
new file mode 100644
index 000000000000..b21b745c8fd1
--- /dev/null
+++ b/include/llvm/CodeGen/ReachingDefAnalysis.h
@@ -0,0 +1,118 @@
+//==--- llvm/CodeGen/ReachingDefAnalysis.h - Reaching Def Analysis -*- C++ -*---==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Reaching Defs Analysis pass.
+///
+/// This pass tracks for each instruction what is the �closest� reaching def of
+/// a given register. It is used by BreakFalseDeps (for clearance calculation)
+/// and ExecutionDomainFix (for arbitrating conflicting domains).
+///
+/// Note that this is different from the usual definition notion of liveness.
+/// The CPU doesn't care whether or not we consider a register killed.
+///
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REACHINGDEFSANALYSIS_H
+#define LLVM_CODEGEN_REACHINGDEFSANALYSIS_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LoopTraversal.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class MachineBasicBlock;
+class MachineInstr;
+
+/// This class provides the reaching def analysis.
+class ReachingDefAnalysis : public MachineFunctionPass {
+private:
+  MachineFunction *MF;
+  const TargetRegisterInfo *TRI;
+  unsigned NumRegUnits;
+  /// Instruction that defined each register, relative to the beginning of the
+  /// current basic block.  When a LiveRegsDefInfo is used to represent a
+  /// live-out register, this value is relative to the end of the basic block,
+  /// so it will be a negative number.
+  using LiveRegsDefInfo = std::vector<int>;
+  LiveRegsDefInfo LiveRegs;
+
+  /// Keeps clearance information for all registers. Note that this
+  /// is different from the usual definition notion of liveness. The CPU
+  /// doesn't care whether or not we consider a register killed.
+  using OutRegsInfoMap = SmallVector<LiveRegsDefInfo, 4>;
+  OutRegsInfoMap MBBOutRegsInfos;
+
+  /// Current instruction number.
+  /// The first instruction in each basic block is 0.
+  int CurInstr;
+
+  /// Maps instructions to their instruction Ids, relative to the begining of
+  /// their basic blocks.
+  DenseMap<MachineInstr *, int> InstIds;
+
+  /// All reaching defs of a given RegUnit for a given MBB.
+  using MBBRegUnitDefs = SmallVector<int, 1>;
+  /// All reaching defs of all reg units for a given MBB
+  using MBBDefsInfo = std::vector<MBBRegUnitDefs>;
+  /// All reaching defs of all reg units for a all MBBs
+  using MBBReachingDefsInfo = SmallVector<MBBDefsInfo, 4>;
+  MBBReachingDefsInfo MBBReachingDefs;
+
+  /// Default values are 'nothing happened a long time ago'.
+  const int ReachingDefDefaultVal = -(1 << 20);
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+
+  ReachingDefAnalysis() : MachineFunctionPass(ID) {
+    initializeReachingDefAnalysisPass(*PassRegistry::getPassRegistry());
+  }
+  void releaseMemory() override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::NoVRegs);
+  }
+
+  /// Provides the instruction id of the closest reaching def instruction of
+  /// PhysReg that reaches MI, relative to the begining of MI's basic block.
+  int getReachingDef(MachineInstr *MI, int PhysReg);
+
+  /// Provides the clearance - the number of instructions since the closest
+  /// reaching def instuction of PhysReg that reaches MI.
+  int getClearance(MachineInstr *MI, MCPhysReg PhysReg);
+
+private:
+  /// Set up LiveRegs by merging predecessor live-out values.
+  void enterBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB);
+
+  /// Update live-out values.
+  void leaveBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB);
+
+  /// Process he given basic block.
+  void processBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB);
+
+  /// Update def-ages for registers defined by MI.
+  /// Also break dependencies on partial defs and undef uses.
+  void processDefs(MachineInstr *);
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_REACHINGDEFSANALYSIS_H
diff --git a/include/llvm/CodeGen/RegAllocPBQP.h b/include/llvm/CodeGen/RegAllocPBQP.h
index 5b342863eb50..ba9763077d09 100644
--- a/include/llvm/CodeGen/RegAllocPBQP.h
+++ b/include/llvm/CodeGen/RegAllocPBQP.h
@@ -43,10 +43,10 @@ class raw_ostream;
 namespace PBQP {
 namespace RegAlloc {
 
-/// @brief Spill option index.
+/// Spill option index.
 inline unsigned getSpillOptionIdx() { return 0; }
 
-/// \brief Metadata to speed allocatability test.
+/// Metadata to speed allocatability test.
 ///
 /// Keeps track of the number of infinities in each row and column.
 class MatrixMetadata {
@@ -89,7 +89,7 @@ private:
   std::unique_ptr<bool[]> UnsafeCols;
 };
 
-/// \brief Holds a vector of the allowed physical regs for a vreg.
+/// Holds a vector of the allowed physical regs for a vreg.
 class AllowedRegVector {
   friend hash_code hash_value(const AllowedRegVector &);
 
@@ -127,7 +127,7 @@ inline hash_code hash_value(const AllowedRegVector &OptRegs) {
                       hash_combine_range(OStart, OEnd));
 }
 
-/// \brief Holds graph-level metadata relevant to PBQP RA problems.
+/// Holds graph-level metadata relevant to PBQP RA problems.
 class GraphMetadata {
 private:
   using AllowedRegVecPool = ValuePool<AllowedRegVector>;
@@ -164,7 +164,7 @@ private:
   AllowedRegVecPool AllowedRegVecs;
 };
 
-/// \brief Holds solver state and other metadata relevant to each PBQP RA node.
+/// Holds solver state and other metadata relevant to each PBQP RA node.
 class NodeMetadata {
 public:
   using AllowedRegVector = RegAlloc::AllowedRegVector;
@@ -505,14 +505,14 @@ private:
 public:
   PBQPRAGraph(GraphMetadata Metadata) : BaseT(std::move(Metadata)) {}
 
-  /// @brief Dump this graph to dbgs().
+  /// Dump this graph to dbgs().
   void dump() const;
 
-  /// @brief Dump this graph to an output stream.
+  /// Dump this graph to an output stream.
   /// @param OS Output stream to print on.
   void dump(raw_ostream &OS) const;
 
-  /// @brief Print a representation of this graph in DOT format.
+  /// Print a representation of this graph in DOT format.
   /// @param OS Output stream to print on.
   void printDot(raw_ostream &OS) const;
 };
@@ -527,7 +527,7 @@ inline Solution solve(PBQPRAGraph& G) {
 } // end namespace RegAlloc
 } // end namespace PBQP
 
-/// @brief Create a PBQP register allocator instance.
+/// Create a PBQP register allocator instance.
 FunctionPass *
 createPBQPRegisterAllocator(char *customPassID = nullptr);
 
diff --git a/include/llvm/CodeGen/RegisterPressure.h b/include/llvm/CodeGen/RegisterPressure.h
index 2b14b78d621d..79054b9e33b7 100644
--- a/include/llvm/CodeGen/RegisterPressure.h
+++ b/include/llvm/CodeGen/RegisterPressure.h
@@ -171,10 +171,10 @@ class RegisterOperands {
 public:
   /// List of virtual registers and register units read by the instruction.
   SmallVector<RegisterMaskPair, 8> Uses;
-  /// \brief List of virtual registers and register units defined by the
+  /// List of virtual registers and register units defined by the
   /// instruction which are not dead.
   SmallVector<RegisterMaskPair, 8> Defs;
-  /// \brief List of virtual registers and register units defined by the
+  /// List of virtual registers and register units defined by the
   /// instruction but dead.
   SmallVector<RegisterMaskPair, 8> DeadDefs;
 
@@ -219,7 +219,7 @@ public:
     return const_cast<PressureDiffs*>(this)->operator[](Idx);
   }
 
-  /// \brief Record pressure difference induced by the given operand list to
+  /// Record pressure difference induced by the given operand list to
   /// node with index \p Idx.
   void addInstruction(unsigned Idx, const RegisterOperands &RegOpers,
                       const MachineRegisterInfo &MRI);
@@ -546,7 +546,7 @@ protected:
   /// Add Reg to the live in set and increase max pressure.
   void discoverLiveIn(RegisterMaskPair Pair);
 
-  /// \brief Get the SlotIndex for the first nondebug instruction including or
+  /// Get the SlotIndex for the first nondebug instruction including or
   /// after the current position.
   SlotIndex getCurrSlot() const;
 
diff --git a/include/llvm/CodeGen/RegisterScavenging.h b/include/llvm/CodeGen/RegisterScavenging.h
index 489c72b81a98..b6bd028a8cac 100644
--- a/include/llvm/CodeGen/RegisterScavenging.h
+++ b/include/llvm/CodeGen/RegisterScavenging.h
@@ -127,7 +127,7 @@ public:
 
   /// Find an unused register of the specified register class.
   /// Return 0 if none is found.
-  unsigned FindUnusedReg(const TargetRegisterClass *RegClass) const;
+  unsigned FindUnusedReg(const TargetRegisterClass *RC) const;
 
   /// Add a scavenging frame index.
   void addScavengingFrameIndex(int FI) {
@@ -158,7 +158,7 @@ public:
   /// Returns the scavenged register.
   /// This is deprecated as it depends on the quality of the kill flags being
   /// present; Use scavengeRegisterBackwards() instead!
-  unsigned scavengeRegister(const TargetRegisterClass *RegClass,
+  unsigned scavengeRegister(const TargetRegisterClass *RC,
                             MachineBasicBlock::iterator I, int SPAdj);
   unsigned scavengeRegister(const TargetRegisterClass *RegClass, int SPAdj) {
     return scavengeRegister(RegClass, MBBI, SPAdj);
@@ -218,7 +218,7 @@ private:
   /// Spill a register after position \p After and reload it before position
   /// \p UseMI.
   ScavengedInfo &spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj,
-                       MachineBasicBlock::iterator After,
+                       MachineBasicBlock::iterator Before,
                        MachineBasicBlock::iterator &UseMI);
 };
 
diff --git a/include/llvm/CodeGen/RegisterUsageInfo.h b/include/llvm/CodeGen/RegisterUsageInfo.h
index eabadd8d784a..efd175eeed30 100644
--- a/include/llvm/CodeGen/RegisterUsageInfo.h
+++ b/include/llvm/CodeGen/RegisterUsageInfo.h
@@ -19,6 +19,7 @@
 #ifndef LLVM_CODEGEN_PHYSICALREGISTERUSAGEINFO_H
 #define LLVM_CODEGEN_PHYSICALREGISTERUSAGEINFO_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/Pass.h"
@@ -31,8 +32,6 @@ class Function;
 class TargetMachine;
 
 class PhysicalRegisterUsageInfo : public ImmutablePass {
-  virtual void anchor();
-
 public:
   static char ID;
 
@@ -41,25 +40,20 @@ public:
     initializePhysicalRegisterUsageInfoPass(Registry);
   }
 
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.setPreservesAll();
-  }
-
-  /// To set TargetMachine *, which is used to print
-  /// analysis when command line option -print-regusage is used.
-  void setTargetMachine(const TargetMachine *TM_) { TM = TM_; }
+  /// Set TargetMachine which is used to print analysis.
+  void setTargetMachine(const TargetMachine &TM);
 
   bool doInitialization(Module &M) override;
 
   bool doFinalization(Module &M) override;
 
   /// To store RegMask for given Function *.
-  void storeUpdateRegUsageInfo(const Function *FP,
-                               std::vector<uint32_t> RegMask);
+  void storeUpdateRegUsageInfo(const Function &FP,
+                               ArrayRef<uint32_t> RegMask);
 
-  /// To query stored RegMask for given Function *, it will return nullptr if
-  /// function is not known.
-  const std::vector<uint32_t> *getRegUsageInfo(const Function *FP);
+  /// To query stored RegMask for given Function *, it will returns ane empty
+  /// array if function is not known.
+  ArrayRef<uint32_t> getRegUsageInfo(const Function &FP);
 
   void print(raw_ostream &OS, const Module *M = nullptr) const override;
 
diff --git a/include/llvm/CodeGen/ResourcePriorityQueue.h b/include/llvm/CodeGen/ResourcePriorityQueue.h
index 03166ccdfe38..8d582ee298b6 100644
--- a/include/llvm/CodeGen/ResourcePriorityQueue.h
+++ b/include/llvm/CodeGen/ResourcePriorityQueue.h
@@ -32,7 +32,7 @@ namespace llvm {
     ResourcePriorityQueue *PQ;
     explicit resource_sort(ResourcePriorityQueue *pq) : PQ(pq) {}
 
-    bool operator()(const SUnit* left, const SUnit* right) const;
+    bool operator()(const SUnit* LHS, const SUnit* RHS) const;
   };
 
   class ResourcePriorityQueue : public SchedulingPriorityQueue {
@@ -121,7 +121,7 @@ namespace llvm {
     void remove(SUnit *SU) override;
 
     /// scheduledNode - Main resource tracking point.
-    void scheduledNode(SUnit *Node) override;
+    void scheduledNode(SUnit *SU) override;
     bool isResourceAvailable(SUnit *SU);
     void reserveResources(SUnit *SU);
 
diff --git a/include/llvm/CodeGen/RuntimeLibcalls.def b/include/llvm/CodeGen/RuntimeLibcalls.def
deleted file mode 100644
index 7695e9d782ef..000000000000
--- a/include/llvm/CodeGen/RuntimeLibcalls.def
+++ /dev/null
@@ -1,495 +0,0 @@
-//===-- llvm/RuntimeLibcalls.def - File that describes libcalls -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines all of the runtime library calls the backend can emit.
-// The various long double types cannot be merged, because 80-bit library
-// functions use "xf" and 128-bit use "tf".
-//
-// When adding PPCF128 functions here, note that their names generally need
-// to be overridden for Darwin with the xxx$LDBL128 form.  See
-// PPCISelLowering.cpp.
-//
-//===----------------------------------------------------------------------===//
-
-// NOTE: NO INCLUDE GUARD DESIRED!
-
-// Provide definitions of macros so that users of this file do not have to
-// define everything to use it...
-
-// Declare the enumerator for each libcall, along with its default name. Some
-// libcalls have different names on particular OSes or architectures. These
-// are set in InitLibcallNames() in TargetLoweringBase.cpp and/or by targets
-// using TargetLoweringBase::setLibcallName()
-#ifndef HANDLE_LIBCALL
-#error "HANDLE_LIBCALL must be defined"
-#endif
-
-// Integer
-HANDLE_LIBCALL(SHL_I16, "__ashlhi3")
-HANDLE_LIBCALL(SHL_I32, "__ashlsi3")
-HANDLE_LIBCALL(SHL_I64, "__ashldi3")
-HANDLE_LIBCALL(SHL_I128, "__ashlti3")
-HANDLE_LIBCALL(SRL_I16, "__lshrhi3")
-HANDLE_LIBCALL(SRL_I32, "__lshrsi3")
-HANDLE_LIBCALL(SRL_I64, "__lshrdi3")
-HANDLE_LIBCALL(SRL_I128, "__lshrti3")
-HANDLE_LIBCALL(SRA_I16, "__ashrhi3")
-HANDLE_LIBCALL(SRA_I32, "__ashrsi3")
-HANDLE_LIBCALL(SRA_I64, "__ashrdi3")
-HANDLE_LIBCALL(SRA_I128, "__ashrti3")
-HANDLE_LIBCALL(MUL_I8, "__mulqi3")
-HANDLE_LIBCALL(MUL_I16, "__mulhi3")
-HANDLE_LIBCALL(MUL_I32, "__mulsi3")
-HANDLE_LIBCALL(MUL_I64, "__muldi3")
-HANDLE_LIBCALL(MUL_I128, "__multi3")
-HANDLE_LIBCALL(MULO_I32, "__mulosi4")
-HANDLE_LIBCALL(MULO_I64, "__mulodi4")
-HANDLE_LIBCALL(MULO_I128, "__muloti4")
-HANDLE_LIBCALL(SDIV_I8, "__divqi3")
-HANDLE_LIBCALL(SDIV_I16, "__divhi3")
-HANDLE_LIBCALL(SDIV_I32, "__divsi3")
-HANDLE_LIBCALL(SDIV_I64, "__divdi3")
-HANDLE_LIBCALL(SDIV_I128, "__divti3")
-HANDLE_LIBCALL(UDIV_I8, "__udivqi3")
-HANDLE_LIBCALL(UDIV_I16, "__udivhi3")
-HANDLE_LIBCALL(UDIV_I32, "__udivsi3")
-HANDLE_LIBCALL(UDIV_I64, "__udivdi3")
-HANDLE_LIBCALL(UDIV_I128, "__udivti3")
-HANDLE_LIBCALL(SREM_I8, "__modqi3")
-HANDLE_LIBCALL(SREM_I16, "__modhi3")
-HANDLE_LIBCALL(SREM_I32, "__modsi3")
-HANDLE_LIBCALL(SREM_I64, "__moddi3")
-HANDLE_LIBCALL(SREM_I128, "__modti3")
-HANDLE_LIBCALL(UREM_I8, "__umodqi3")
-HANDLE_LIBCALL(UREM_I16, "__umodhi3")
-HANDLE_LIBCALL(UREM_I32, "__umodsi3")
-HANDLE_LIBCALL(UREM_I64, "__umoddi3")
-HANDLE_LIBCALL(UREM_I128, "__umodti3")
-HANDLE_LIBCALL(SDIVREM_I8, nullptr)
-HANDLE_LIBCALL(SDIVREM_I16, nullptr)
-HANDLE_LIBCALL(SDIVREM_I32, nullptr)
-HANDLE_LIBCALL(SDIVREM_I64, nullptr)
-HANDLE_LIBCALL(SDIVREM_I128, nullptr)
-HANDLE_LIBCALL(UDIVREM_I8, nullptr)
-HANDLE_LIBCALL(UDIVREM_I16, nullptr)
-HANDLE_LIBCALL(UDIVREM_I32, nullptr)
-HANDLE_LIBCALL(UDIVREM_I64, nullptr)
-HANDLE_LIBCALL(UDIVREM_I128, nullptr)
-HANDLE_LIBCALL(NEG_I32, "__negsi2")
-HANDLE_LIBCALL(NEG_I64, "__negdi2")
-
-// Floating-point
-HANDLE_LIBCALL(ADD_F32, "__addsf3")
-HANDLE_LIBCALL(ADD_F64, "__adddf3")
-HANDLE_LIBCALL(ADD_F80, "__addxf3")
-HANDLE_LIBCALL(ADD_F128, "__addtf3")
-HANDLE_LIBCALL(ADD_PPCF128, "__gcc_qadd")
-HANDLE_LIBCALL(SUB_F32, "__subsf3")
-HANDLE_LIBCALL(SUB_F64, "__subdf3")
-HANDLE_LIBCALL(SUB_F80, "__subxf3")
-HANDLE_LIBCALL(SUB_F128, "__subtf3")
-HANDLE_LIBCALL(SUB_PPCF128, "__gcc_qsub")
-HANDLE_LIBCALL(MUL_F32, "__mulsf3")
-HANDLE_LIBCALL(MUL_F64, "__muldf3")
-HANDLE_LIBCALL(MUL_F80, "__mulxf3")
-HANDLE_LIBCALL(MUL_F128, "__multf3")
-HANDLE_LIBCALL(MUL_PPCF128, "__gcc_qmul")
-HANDLE_LIBCALL(DIV_F32, "__divsf3")
-HANDLE_LIBCALL(DIV_F64, "__divdf3")
-HANDLE_LIBCALL(DIV_F80, "__divxf3")
-HANDLE_LIBCALL(DIV_F128, "__divtf3")
-HANDLE_LIBCALL(DIV_PPCF128, "__gcc_qdiv")
-HANDLE_LIBCALL(REM_F32, "fmodf")
-HANDLE_LIBCALL(REM_F64, "fmod")
-HANDLE_LIBCALL(REM_F80, "fmodl")
-HANDLE_LIBCALL(REM_F128, "fmodl")
-HANDLE_LIBCALL(REM_PPCF128, "fmodl")
-HANDLE_LIBCALL(FMA_F32, "fmaf")
-HANDLE_LIBCALL(FMA_F64, "fma")
-HANDLE_LIBCALL(FMA_F80, "fmal")
-HANDLE_LIBCALL(FMA_F128, "fmal")
-HANDLE_LIBCALL(FMA_PPCF128, "fmal")
-HANDLE_LIBCALL(POWI_F32, "__powisf2")
-HANDLE_LIBCALL(POWI_F64, "__powidf2")
-HANDLE_LIBCALL(POWI_F80, "__powixf2")
-HANDLE_LIBCALL(POWI_F128, "__powitf2")
-HANDLE_LIBCALL(POWI_PPCF128, "__powitf2")
-HANDLE_LIBCALL(SQRT_F32, "sqrtf")
-HANDLE_LIBCALL(SQRT_F64, "sqrt")
-HANDLE_LIBCALL(SQRT_F80, "sqrtl")
-HANDLE_LIBCALL(SQRT_F128, "sqrtl")
-HANDLE_LIBCALL(SQRT_PPCF128, "sqrtl")
-HANDLE_LIBCALL(LOG_F32, "logf")
-HANDLE_LIBCALL(LOG_F64, "log")
-HANDLE_LIBCALL(LOG_F80, "logl")
-HANDLE_LIBCALL(LOG_F128, "logl")
-HANDLE_LIBCALL(LOG_PPCF128, "logl")
-HANDLE_LIBCALL(LOG2_F32, "log2f")
-HANDLE_LIBCALL(LOG2_F64, "log2")
-HANDLE_LIBCALL(LOG2_F80, "log2l")
-HANDLE_LIBCALL(LOG2_F128, "log2l")
-HANDLE_LIBCALL(LOG2_PPCF128, "log2l")
-HANDLE_LIBCALL(LOG10_F32, "log10f")
-HANDLE_LIBCALL(LOG10_F64, "log10")
-HANDLE_LIBCALL(LOG10_F80, "log10l")
-HANDLE_LIBCALL(LOG10_F128, "log10l")
-HANDLE_LIBCALL(LOG10_PPCF128, "log10l")
-HANDLE_LIBCALL(EXP_F32, "expf")
-HANDLE_LIBCALL(EXP_F64, "exp")
-HANDLE_LIBCALL(EXP_F80, "expl")
-HANDLE_LIBCALL(EXP_F128, "expl")
-HANDLE_LIBCALL(EXP_PPCF128, "expl")
-HANDLE_LIBCALL(EXP2_F32, "exp2f")
-HANDLE_LIBCALL(EXP2_F64, "exp2")
-HANDLE_LIBCALL(EXP2_F80, "exp2l")
-HANDLE_LIBCALL(EXP2_F128, "exp2l")
-HANDLE_LIBCALL(EXP2_PPCF128, "exp2l")
-HANDLE_LIBCALL(SIN_F32, "sinf")
-HANDLE_LIBCALL(SIN_F64, "sin")
-HANDLE_LIBCALL(SIN_F80, "sinl")
-HANDLE_LIBCALL(SIN_F128, "sinl")
-HANDLE_LIBCALL(SIN_PPCF128, "sinl")
-HANDLE_LIBCALL(COS_F32, "cosf")
-HANDLE_LIBCALL(COS_F64, "cos")
-HANDLE_LIBCALL(COS_F80, "cosl")
-HANDLE_LIBCALL(COS_F128, "cosl")
-HANDLE_LIBCALL(COS_PPCF128, "cosl")
-HANDLE_LIBCALL(SINCOS_F32, nullptr)
-HANDLE_LIBCALL(SINCOS_F64, nullptr)
-HANDLE_LIBCALL(SINCOS_F80, nullptr)
-HANDLE_LIBCALL(SINCOS_F128, nullptr)
-HANDLE_LIBCALL(SINCOS_PPCF128, nullptr)
-HANDLE_LIBCALL(SINCOS_STRET_F32, nullptr)
-HANDLE_LIBCALL(SINCOS_STRET_F64, nullptr)
-HANDLE_LIBCALL(POW_F32, "powf")
-HANDLE_LIBCALL(POW_F64, "pow")
-HANDLE_LIBCALL(POW_F80, "powl")
-HANDLE_LIBCALL(POW_F128, "powl")
-HANDLE_LIBCALL(POW_PPCF128, "powl")
-HANDLE_LIBCALL(CEIL_F32, "ceilf")
-HANDLE_LIBCALL(CEIL_F64, "ceil")
-HANDLE_LIBCALL(CEIL_F80, "ceill")
-HANDLE_LIBCALL(CEIL_F128, "ceill")
-HANDLE_LIBCALL(CEIL_PPCF128, "ceill")
-HANDLE_LIBCALL(TRUNC_F32, "truncf")
-HANDLE_LIBCALL(TRUNC_F64, "trunc")
-HANDLE_LIBCALL(TRUNC_F80, "truncl")
-HANDLE_LIBCALL(TRUNC_F128, "truncl")
-HANDLE_LIBCALL(TRUNC_PPCF128, "truncl")
-HANDLE_LIBCALL(RINT_F32, "rintf")
-HANDLE_LIBCALL(RINT_F64, "rint")
-HANDLE_LIBCALL(RINT_F80, "rintl")
-HANDLE_LIBCALL(RINT_F128, "rintl")
-HANDLE_LIBCALL(RINT_PPCF128, "rintl")
-HANDLE_LIBCALL(NEARBYINT_F32, "nearbyintf")
-HANDLE_LIBCALL(NEARBYINT_F64, "nearbyint")
-HANDLE_LIBCALL(NEARBYINT_F80, "nearbyintl")
-HANDLE_LIBCALL(NEARBYINT_F128, "nearbyintl")
-HANDLE_LIBCALL(NEARBYINT_PPCF128, "nearbyintl")
-HANDLE_LIBCALL(ROUND_F32, "roundf")
-HANDLE_LIBCALL(ROUND_F64, "round")
-HANDLE_LIBCALL(ROUND_F80, "roundl")
-HANDLE_LIBCALL(ROUND_F128, "roundl")
-HANDLE_LIBCALL(ROUND_PPCF128, "roundl")
-HANDLE_LIBCALL(FLOOR_F32, "floorf")
-HANDLE_LIBCALL(FLOOR_F64, "floor")
-HANDLE_LIBCALL(FLOOR_F80, "floorl")
-HANDLE_LIBCALL(FLOOR_F128, "floorl")
-HANDLE_LIBCALL(FLOOR_PPCF128, "floorl")
-HANDLE_LIBCALL(COPYSIGN_F32, "copysignf")
-HANDLE_LIBCALL(COPYSIGN_F64, "copysign")
-HANDLE_LIBCALL(COPYSIGN_F80, "copysignl")
-HANDLE_LIBCALL(COPYSIGN_F128, "copysignl")
-HANDLE_LIBCALL(COPYSIGN_PPCF128, "copysignl")
-HANDLE_LIBCALL(FMIN_F32, "fminf")
-HANDLE_LIBCALL(FMIN_F64, "fmin")
-HANDLE_LIBCALL(FMIN_F80, "fminl")
-HANDLE_LIBCALL(FMIN_F128, "fminl")
-HANDLE_LIBCALL(FMIN_PPCF128, "fminl")
-HANDLE_LIBCALL(FMAX_F32, "fmaxf")
-HANDLE_LIBCALL(FMAX_F64, "fmax")
-HANDLE_LIBCALL(FMAX_F80, "fmaxl")
-HANDLE_LIBCALL(FMAX_F128, "fmaxl")
-HANDLE_LIBCALL(FMAX_PPCF128, "fmaxl")
-
-// Conversion
-HANDLE_LIBCALL(FPEXT_F32_PPCF128, "__gcc_stoq")
-HANDLE_LIBCALL(FPEXT_F64_PPCF128, "__gcc_dtoq")
-HANDLE_LIBCALL(FPEXT_F64_F128, "__extenddftf2")
-HANDLE_LIBCALL(FPEXT_F32_F128, "__extendsftf2")
-HANDLE_LIBCALL(FPEXT_F32_F64, "__extendsfdf2")
-HANDLE_LIBCALL(FPEXT_F16_F32, "__gnu_h2f_ieee")
-HANDLE_LIBCALL(FPROUND_F32_F16, "__gnu_f2h_ieee")
-HANDLE_LIBCALL(FPROUND_F64_F16, "__truncdfhf2")
-HANDLE_LIBCALL(FPROUND_F80_F16, "__truncxfhf2")
-HANDLE_LIBCALL(FPROUND_F128_F16, "__trunctfhf2")
-HANDLE_LIBCALL(FPROUND_PPCF128_F16, "__trunctfhf2")
-HANDLE_LIBCALL(FPROUND_F64_F32, "__truncdfsf2")
-HANDLE_LIBCALL(FPROUND_F80_F32, "__truncxfsf2")
-HANDLE_LIBCALL(FPROUND_F128_F32, "__trunctfsf2")
-HANDLE_LIBCALL(FPROUND_PPCF128_F32, "__gcc_qtos")
-HANDLE_LIBCALL(FPROUND_F80_F64, "__truncxfdf2")
-HANDLE_LIBCALL(FPROUND_F128_F64, "__trunctfdf2")
-HANDLE_LIBCALL(FPROUND_PPCF128_F64, "__gcc_qtod")
-HANDLE_LIBCALL(FPTOSINT_F32_I32, "__fixsfsi")
-HANDLE_LIBCALL(FPTOSINT_F32_I64, "__fixsfdi")
-HANDLE_LIBCALL(FPTOSINT_F32_I128, "__fixsfti")
-HANDLE_LIBCALL(FPTOSINT_F64_I32, "__fixdfsi")
-HANDLE_LIBCALL(FPTOSINT_F64_I64, "__fixdfdi")
-HANDLE_LIBCALL(FPTOSINT_F64_I128, "__fixdfti")
-HANDLE_LIBCALL(FPTOSINT_F80_I32, "__fixxfsi")
-HANDLE_LIBCALL(FPTOSINT_F80_I64, "__fixxfdi")
-HANDLE_LIBCALL(FPTOSINT_F80_I128, "__fixxfti")
-HANDLE_LIBCALL(FPTOSINT_F128_I32, "__fixtfsi")
-HANDLE_LIBCALL(FPTOSINT_F128_I64, "__fixtfdi")
-HANDLE_LIBCALL(FPTOSINT_F128_I128, "__fixtfti")
-HANDLE_LIBCALL(FPTOSINT_PPCF128_I32, "__gcc_qtou")
-HANDLE_LIBCALL(FPTOSINT_PPCF128_I64, "__fixtfdi")
-HANDLE_LIBCALL(FPTOSINT_PPCF128_I128, "__fixtfti")
-HANDLE_LIBCALL(FPTOUINT_F32_I32, "__fixunssfsi")
-HANDLE_LIBCALL(FPTOUINT_F32_I64, "__fixunssfdi")
-HANDLE_LIBCALL(FPTOUINT_F32_I128, "__fixunssfti")
-HANDLE_LIBCALL(FPTOUINT_F64_I32, "__fixunsdfsi")
-HANDLE_LIBCALL(FPTOUINT_F64_I64, "__fixunsdfdi")
-HANDLE_LIBCALL(FPTOUINT_F64_I128, "__fixunsdfti")
-HANDLE_LIBCALL(FPTOUINT_F80_I32, "__fixunsxfsi")
-HANDLE_LIBCALL(FPTOUINT_F80_I64, "__fixunsxfdi")
-HANDLE_LIBCALL(FPTOUINT_F80_I128, "__fixunsxfti")
-HANDLE_LIBCALL(FPTOUINT_F128_I32, "__fixunstfsi")
-HANDLE_LIBCALL(FPTOUINT_F128_I64, "__fixunstfdi")
-HANDLE_LIBCALL(FPTOUINT_F128_I128, "__fixunstfti")
-HANDLE_LIBCALL(FPTOUINT_PPCF128_I32, "__fixunstfsi")
-HANDLE_LIBCALL(FPTOUINT_PPCF128_I64, "__fixunstfdi")
-HANDLE_LIBCALL(FPTOUINT_PPCF128_I128, "__fixunstfti")
-HANDLE_LIBCALL(SINTTOFP_I32_F32, "__floatsisf")
-HANDLE_LIBCALL(SINTTOFP_I32_F64, "__floatsidf")
-HANDLE_LIBCALL(SINTTOFP_I32_F80, "__floatsixf")
-HANDLE_LIBCALL(SINTTOFP_I32_F128, "__floatsitf")
-HANDLE_LIBCALL(SINTTOFP_I32_PPCF128, "__gcc_itoq")
-HANDLE_LIBCALL(SINTTOFP_I64_F32, "__floatdisf")
-HANDLE_LIBCALL(SINTTOFP_I64_F64, "__floatdidf")
-HANDLE_LIBCALL(SINTTOFP_I64_F80, "__floatdixf")
-HANDLE_LIBCALL(SINTTOFP_I64_F128, "__floatditf")
-HANDLE_LIBCALL(SINTTOFP_I64_PPCF128, "__floatditf")
-HANDLE_LIBCALL(SINTTOFP_I128_F32, "__floattisf")
-HANDLE_LIBCALL(SINTTOFP_I128_F64, "__floattidf")
-HANDLE_LIBCALL(SINTTOFP_I128_F80, "__floattixf")
-HANDLE_LIBCALL(SINTTOFP_I128_F128, "__floattitf")
-HANDLE_LIBCALL(SINTTOFP_I128_PPCF128, "__floattitf")
-HANDLE_LIBCALL(UINTTOFP_I32_F32, "__floatunsisf")
-HANDLE_LIBCALL(UINTTOFP_I32_F64, "__floatunsidf")
-HANDLE_LIBCALL(UINTTOFP_I32_F80, "__floatunsixf")
-HANDLE_LIBCALL(UINTTOFP_I32_F128, "__floatunsitf")
-HANDLE_LIBCALL(UINTTOFP_I32_PPCF128, "__gcc_utoq")
-HANDLE_LIBCALL(UINTTOFP_I64_F32, "__floatundisf")
-HANDLE_LIBCALL(UINTTOFP_I64_F64, "__floatundidf")
-HANDLE_LIBCALL(UINTTOFP_I64_F80, "__floatundixf")
-HANDLE_LIBCALL(UINTTOFP_I64_F128, "__floatunditf")
-HANDLE_LIBCALL(UINTTOFP_I64_PPCF128, "__floatunditf")
-HANDLE_LIBCALL(UINTTOFP_I128_F32, "__floatuntisf")
-HANDLE_LIBCALL(UINTTOFP_I128_F64, "__floatuntidf")
-HANDLE_LIBCALL(UINTTOFP_I128_F80, "__floatuntixf")
-HANDLE_LIBCALL(UINTTOFP_I128_F128, "__floatuntitf")
-HANDLE_LIBCALL(UINTTOFP_I128_PPCF128, "__floatuntitf")
-
-// Comparison
-HANDLE_LIBCALL(OEQ_F32, "__eqsf2")
-HANDLE_LIBCALL(OEQ_F64, "__eqdf2")
-HANDLE_LIBCALL(OEQ_F128, "__eqtf2")
-HANDLE_LIBCALL(OEQ_PPCF128, "__gcc_qeq")
-HANDLE_LIBCALL(UNE_F32, "__nesf2")
-HANDLE_LIBCALL(UNE_F64, "__nedf2")
-HANDLE_LIBCALL(UNE_F128, "__netf2")
-HANDLE_LIBCALL(UNE_PPCF128, "__gcc_qne")
-HANDLE_LIBCALL(OGE_F32, "__gesf2")
-HANDLE_LIBCALL(OGE_F64, "__gedf2")
-HANDLE_LIBCALL(OGE_F128, "__getf2")
-HANDLE_LIBCALL(OGE_PPCF128, "__gcc_qge")
-HANDLE_LIBCALL(OLT_F32, "__ltsf2")
-HANDLE_LIBCALL(OLT_F64, "__ltdf2")
-HANDLE_LIBCALL(OLT_F128, "__lttf2")
-HANDLE_LIBCALL(OLT_PPCF128, "__gcc_qlt")
-HANDLE_LIBCALL(OLE_F32, "__lesf2")
-HANDLE_LIBCALL(OLE_F64, "__ledf2")
-HANDLE_LIBCALL(OLE_F128, "__letf2")
-HANDLE_LIBCALL(OLE_PPCF128, "__gcc_qle")
-HANDLE_LIBCALL(OGT_F32, "__gtsf2")
-HANDLE_LIBCALL(OGT_F64, "__gtdf2")
-HANDLE_LIBCALL(OGT_F128, "__gttf2")
-HANDLE_LIBCALL(OGT_PPCF128, "__gcc_qgt")
-HANDLE_LIBCALL(UO_F32, "__unordsf2")
-HANDLE_LIBCALL(UO_F64, "__unorddf2")
-HANDLE_LIBCALL(UO_F128, "__unordtf2")
-HANDLE_LIBCALL(UO_PPCF128, "__gcc_qunord")
-HANDLE_LIBCALL(O_F32, "__unordsf2")
-HANDLE_LIBCALL(O_F64, "__unorddf2")
-HANDLE_LIBCALL(O_F128, "__unordtf2")
-HANDLE_LIBCALL(O_PPCF128, "__gcc_qunord")
-
-// Memory
-HANDLE_LIBCALL(MEMCPY, "memcpy")
-HANDLE_LIBCALL(MEMMOVE, "memmove")
-HANDLE_LIBCALL(MEMSET, "memset")
-HANDLE_LIBCALL(BZERO, nullptr)
-
-// Element-wise unordered-atomic memory of different sizes
-HANDLE_LIBCALL(MEMCPY_ELEMENT_UNORDERED_ATOMIC_1, "__llvm_memcpy_element_unordered_atomic_1")
-HANDLE_LIBCALL(MEMCPY_ELEMENT_UNORDERED_ATOMIC_2, "__llvm_memcpy_element_unordered_atomic_2")
-HANDLE_LIBCALL(MEMCPY_ELEMENT_UNORDERED_ATOMIC_4, "__llvm_memcpy_element_unordered_atomic_4")
-HANDLE_LIBCALL(MEMCPY_ELEMENT_UNORDERED_ATOMIC_8, "__llvm_memcpy_element_unordered_atomic_8")
-HANDLE_LIBCALL(MEMCPY_ELEMENT_UNORDERED_ATOMIC_16, "__llvm_memcpy_element_unordered_atomic_16")
-HANDLE_LIBCALL(MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1, "__llvm_memmove_element_unordered_atomic_1")
-HANDLE_LIBCALL(MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2, "__llvm_memmove_element_unordered_atomic_2")
-HANDLE_LIBCALL(MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4, "__llvm_memmove_element_unordered_atomic_4")
-HANDLE_LIBCALL(MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8, "__llvm_memmove_element_unordered_atomic_8")
-HANDLE_LIBCALL(MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16, "__llvm_memmove_element_unordered_atomic_16")
-HANDLE_LIBCALL(MEMSET_ELEMENT_UNORDERED_ATOMIC_1, "__llvm_memset_element_unordered_atomic_1")
-HANDLE_LIBCALL(MEMSET_ELEMENT_UNORDERED_ATOMIC_2, "__llvm_memset_element_unordered_atomic_2")
-HANDLE_LIBCALL(MEMSET_ELEMENT_UNORDERED_ATOMIC_4, "__llvm_memset_element_unordered_atomic_4")
-HANDLE_LIBCALL(MEMSET_ELEMENT_UNORDERED_ATOMIC_8, "__llvm_memset_element_unordered_atomic_8")
-HANDLE_LIBCALL(MEMSET_ELEMENT_UNORDERED_ATOMIC_16, "__llvm_memset_element_unordered_atomic_16")
-
-// Exception handling
-HANDLE_LIBCALL(UNWIND_RESUME, "_Unwind_Resume")
-
-// Note: there are two sets of atomics libcalls; see
-// <https://llvm.org/docs/Atomics.html> for more info on the
-// difference between them.
-
-// Atomic '__sync_*' libcalls.
-HANDLE_LIBCALL(SYNC_VAL_COMPARE_AND_SWAP_1, "__sync_val_compare_and_swap_1")
-HANDLE_LIBCALL(SYNC_VAL_COMPARE_AND_SWAP_2, "__sync_val_compare_and_swap_2")
-HANDLE_LIBCALL(SYNC_VAL_COMPARE_AND_SWAP_4, "__sync_val_compare_and_swap_4")
-HANDLE_LIBCALL(SYNC_VAL_COMPARE_AND_SWAP_8, "__sync_val_compare_and_swap_8")
-HANDLE_LIBCALL(SYNC_VAL_COMPARE_AND_SWAP_16, "__sync_val_compare_and_swap_16")
-HANDLE_LIBCALL(SYNC_LOCK_TEST_AND_SET_1, "__sync_lock_test_and_set_1")
-HANDLE_LIBCALL(SYNC_LOCK_TEST_AND_SET_2, "__sync_lock_test_and_set_2")
-HANDLE_LIBCALL(SYNC_LOCK_TEST_AND_SET_4, "__sync_lock_test_and_set_4")
-HANDLE_LIBCALL(SYNC_LOCK_TEST_AND_SET_8, "__sync_lock_test_and_set_8")
-HANDLE_LIBCALL(SYNC_LOCK_TEST_AND_SET_16, "__sync_lock_test_and_set_16")
-HANDLE_LIBCALL(SYNC_FETCH_AND_ADD_1, "__sync_fetch_and_add_1")
-HANDLE_LIBCALL(SYNC_FETCH_AND_ADD_2, "__sync_fetch_and_add_2")
-HANDLE_LIBCALL(SYNC_FETCH_AND_ADD_4, "__sync_fetch_and_add_4")
-HANDLE_LIBCALL(SYNC_FETCH_AND_ADD_8, "__sync_fetch_and_add_8")
-HANDLE_LIBCALL(SYNC_FETCH_AND_ADD_16, "__sync_fetch_and_add_16")
-HANDLE_LIBCALL(SYNC_FETCH_AND_SUB_1, "__sync_fetch_and_sub_1")
-HANDLE_LIBCALL(SYNC_FETCH_AND_SUB_2, "__sync_fetch_and_sub_2")
-HANDLE_LIBCALL(SYNC_FETCH_AND_SUB_4, "__sync_fetch_and_sub_4")
-HANDLE_LIBCALL(SYNC_FETCH_AND_SUB_8, "__sync_fetch_and_sub_8")
-HANDLE_LIBCALL(SYNC_FETCH_AND_SUB_16, "__sync_fetch_and_sub_16")
-HANDLE_LIBCALL(SYNC_FETCH_AND_AND_1, "__sync_fetch_and_and_1")
-HANDLE_LIBCALL(SYNC_FETCH_AND_AND_2, "__sync_fetch_and_and_2")
-HANDLE_LIBCALL(SYNC_FETCH_AND_AND_4, "__sync_fetch_and_and_4")
-HANDLE_LIBCALL(SYNC_FETCH_AND_AND_8, "__sync_fetch_and_and_8")
-HANDLE_LIBCALL(SYNC_FETCH_AND_AND_16, "__sync_fetch_and_and_16")
-HANDLE_LIBCALL(SYNC_FETCH_AND_OR_1, "__sync_fetch_and_or_1")
-HANDLE_LIBCALL(SYNC_FETCH_AND_OR_2, "__sync_fetch_and_or_2")
-HANDLE_LIBCALL(SYNC_FETCH_AND_OR_4, "__sync_fetch_and_or_4")
-HANDLE_LIBCALL(SYNC_FETCH_AND_OR_8, "__sync_fetch_and_or_8")
-HANDLE_LIBCALL(SYNC_FETCH_AND_OR_16, "__sync_fetch_and_or_16")
-HANDLE_LIBCALL(SYNC_FETCH_AND_XOR_1, "__sync_fetch_and_xor_1")
-HANDLE_LIBCALL(SYNC_FETCH_AND_XOR_2, "__sync_fetch_and_xor_2")
-HANDLE_LIBCALL(SYNC_FETCH_AND_XOR_4, "__sync_fetch_and_xor_4")
-HANDLE_LIBCALL(SYNC_FETCH_AND_XOR_8, "__sync_fetch_and_xor_8")
-HANDLE_LIBCALL(SYNC_FETCH_AND_XOR_16, "__sync_fetch_and_xor_16")
-HANDLE_LIBCALL(SYNC_FETCH_AND_NAND_1, "__sync_fetch_and_nand_1")
-HANDLE_LIBCALL(SYNC_FETCH_AND_NAND_2, "__sync_fetch_and_nand_2")
-HANDLE_LIBCALL(SYNC_FETCH_AND_NAND_4, "__sync_fetch_and_nand_4")
-HANDLE_LIBCALL(SYNC_FETCH_AND_NAND_8, "__sync_fetch_and_nand_8")
-HANDLE_LIBCALL(SYNC_FETCH_AND_NAND_16, "__sync_fetch_and_nand_16")
-HANDLE_LIBCALL(SYNC_FETCH_AND_MAX_1, "__sync_fetch_and_max_1")
-HANDLE_LIBCALL(SYNC_FETCH_AND_MAX_2, "__sync_fetch_and_max_2")
-HANDLE_LIBCALL(SYNC_FETCH_AND_MAX_4, "__sync_fetch_and_max_4")
-HANDLE_LIBCALL(SYNC_FETCH_AND_MAX_8, "__sync_fetch_and_max_8")
-HANDLE_LIBCALL(SYNC_FETCH_AND_MAX_16, "__sync_fetch_and_max_16")
-HANDLE_LIBCALL(SYNC_FETCH_AND_UMAX_1, "__sync_fetch_and_umax_1")
-HANDLE_LIBCALL(SYNC_FETCH_AND_UMAX_2, "__sync_fetch_and_umax_2")
-HANDLE_LIBCALL(SYNC_FETCH_AND_UMAX_4, "__sync_fetch_and_umax_4")
-HANDLE_LIBCALL(SYNC_FETCH_AND_UMAX_8, "__sync_fetch_and_umax_8")
-HANDLE_LIBCALL(SYNC_FETCH_AND_UMAX_16, "__sync_fetch_and_umax_16")
-HANDLE_LIBCALL(SYNC_FETCH_AND_MIN_1, "__sync_fetch_and_min_1")
-HANDLE_LIBCALL(SYNC_FETCH_AND_MIN_2, "__sync_fetch_and_min_2")
-HANDLE_LIBCALL(SYNC_FETCH_AND_MIN_4, "__sync_fetch_and_min_4")
-HANDLE_LIBCALL(SYNC_FETCH_AND_MIN_8, "__sync_fetch_and_min_8")
-HANDLE_LIBCALL(SYNC_FETCH_AND_MIN_16, "__sync_fetch_and_min_16")
-HANDLE_LIBCALL(SYNC_FETCH_AND_UMIN_1, "__sync_fetch_and_umin_1")
-HANDLE_LIBCALL(SYNC_FETCH_AND_UMIN_2, "__sync_fetch_and_umin_2")
-HANDLE_LIBCALL(SYNC_FETCH_AND_UMIN_4, "__sync_fetch_and_umin_4")
-HANDLE_LIBCALL(SYNC_FETCH_AND_UMIN_8, "__sync_fetch_and_umin_8")
-HANDLE_LIBCALL(SYNC_FETCH_AND_UMIN_16, "__sync_fetch_and_umin_16")
-
-// Atomic `__atomic_*' libcalls.
-HANDLE_LIBCALL(ATOMIC_LOAD, "__atomic_load")
-HANDLE_LIBCALL(ATOMIC_LOAD_1, "__atomic_load_1")
-HANDLE_LIBCALL(ATOMIC_LOAD_2, "__atomic_load_2")
-HANDLE_LIBCALL(ATOMIC_LOAD_4, "__atomic_load_4")
-HANDLE_LIBCALL(ATOMIC_LOAD_8, "__atomic_load_8")
-HANDLE_LIBCALL(ATOMIC_LOAD_16, "__atomic_load_16")
-
-HANDLE_LIBCALL(ATOMIC_STORE, "__atomic_store")
-HANDLE_LIBCALL(ATOMIC_STORE_1, "__atomic_store_1")
-HANDLE_LIBCALL(ATOMIC_STORE_2, "__atomic_store_2")
-HANDLE_LIBCALL(ATOMIC_STORE_4, "__atomic_store_4")
-HANDLE_LIBCALL(ATOMIC_STORE_8, "__atomic_store_8")
-HANDLE_LIBCALL(ATOMIC_STORE_16, "__atomic_store_16")
-
-HANDLE_LIBCALL(ATOMIC_EXCHANGE, "__atomic_exchange")
-HANDLE_LIBCALL(ATOMIC_EXCHANGE_1, "__atomic_exchange_1")
-HANDLE_LIBCALL(ATOMIC_EXCHANGE_2, "__atomic_exchange_2")
-HANDLE_LIBCALL(ATOMIC_EXCHANGE_4, "__atomic_exchange_4")
-HANDLE_LIBCALL(ATOMIC_EXCHANGE_8, "__atomic_exchange_8")
-HANDLE_LIBCALL(ATOMIC_EXCHANGE_16, "__atomic_exchange_16")
-
-HANDLE_LIBCALL(ATOMIC_COMPARE_EXCHANGE, "__atomic_compare_exchange")
-HANDLE_LIBCALL(ATOMIC_COMPARE_EXCHANGE_1, "__atomic_compare_exchange_1")
-HANDLE_LIBCALL(ATOMIC_COMPARE_EXCHANGE_2, "__atomic_compare_exchange_2")
-HANDLE_LIBCALL(ATOMIC_COMPARE_EXCHANGE_4, "__atomic_compare_exchange_4")
-HANDLE_LIBCALL(ATOMIC_COMPARE_EXCHANGE_8, "__atomic_compare_exchange_8")
-HANDLE_LIBCALL(ATOMIC_COMPARE_EXCHANGE_16, "__atomic_compare_exchange_16")
-
-HANDLE_LIBCALL(ATOMIC_FETCH_ADD_1, "__atomic_fetch_add_1")
-HANDLE_LIBCALL(ATOMIC_FETCH_ADD_2, "__atomic_fetch_add_2")
-HANDLE_LIBCALL(ATOMIC_FETCH_ADD_4, "__atomic_fetch_add_4")
-HANDLE_LIBCALL(ATOMIC_FETCH_ADD_8, "__atomic_fetch_add_8")
-HANDLE_LIBCALL(ATOMIC_FETCH_ADD_16, "__atomic_fetch_add_16")
-HANDLE_LIBCALL(ATOMIC_FETCH_SUB_1, "__atomic_fetch_sub_1")
-HANDLE_LIBCALL(ATOMIC_FETCH_SUB_2, "__atomic_fetch_sub_2")
-HANDLE_LIBCALL(ATOMIC_FETCH_SUB_4, "__atomic_fetch_sub_4")
-HANDLE_LIBCALL(ATOMIC_FETCH_SUB_8, "__atomic_fetch_sub_8")
-HANDLE_LIBCALL(ATOMIC_FETCH_SUB_16, "__atomic_fetch_sub_16")
-HANDLE_LIBCALL(ATOMIC_FETCH_AND_1, "__atomic_fetch_and_1")
-HANDLE_LIBCALL(ATOMIC_FETCH_AND_2, "__atomic_fetch_and_2")
-HANDLE_LIBCALL(ATOMIC_FETCH_AND_4, "__atomic_fetch_and_4")
-HANDLE_LIBCALL(ATOMIC_FETCH_AND_8, "__atomic_fetch_and_8")
-HANDLE_LIBCALL(ATOMIC_FETCH_AND_16, "__atomic_fetch_and_16")
-HANDLE_LIBCALL(ATOMIC_FETCH_OR_1, "__atomic_fetch_or_1")
-HANDLE_LIBCALL(ATOMIC_FETCH_OR_2, "__atomic_fetch_or_2")
-HANDLE_LIBCALL(ATOMIC_FETCH_OR_4, "__atomic_fetch_or_4")
-HANDLE_LIBCALL(ATOMIC_FETCH_OR_8, "__atomic_fetch_or_8")
-HANDLE_LIBCALL(ATOMIC_FETCH_OR_16, "__atomic_fetch_or_16")
-HANDLE_LIBCALL(ATOMIC_FETCH_XOR_1, "__atomic_fetch_xor_1")
-HANDLE_LIBCALL(ATOMIC_FETCH_XOR_2, "__atomic_fetch_xor_2")
-HANDLE_LIBCALL(ATOMIC_FETCH_XOR_4, "__atomic_fetch_xor_4")
-HANDLE_LIBCALL(ATOMIC_FETCH_XOR_8, "__atomic_fetch_xor_8")
-HANDLE_LIBCALL(ATOMIC_FETCH_XOR_16, "__atomic_fetch_xor_16")
-HANDLE_LIBCALL(ATOMIC_FETCH_NAND_1, "__atomic_fetch_nand_1")
-HANDLE_LIBCALL(ATOMIC_FETCH_NAND_2, "__atomic_fetch_nand_2")
-HANDLE_LIBCALL(ATOMIC_FETCH_NAND_4, "__atomic_fetch_nand_4")
-HANDLE_LIBCALL(ATOMIC_FETCH_NAND_8, "__atomic_fetch_nand_8")
-HANDLE_LIBCALL(ATOMIC_FETCH_NAND_16, "__atomic_fetch_nand_16")
-
-// Stack Protector Fail
-HANDLE_LIBCALL(STACKPROTECTOR_CHECK_FAIL, "__stack_chk_fail")
-
-// Deoptimization
-HANDLE_LIBCALL(DEOPTIMIZE, "__llvm_deoptimize")
-
-HANDLE_LIBCALL(UNKNOWN_LIBCALL, nullptr)
-
-#undef HANDLE_LIBCALL
diff --git a/include/llvm/CodeGen/RuntimeLibcalls.h b/include/llvm/CodeGen/RuntimeLibcalls.h
index 016bef1702c4..28567a1ce437 100644
--- a/include/llvm/CodeGen/RuntimeLibcalls.h
+++ b/include/llvm/CodeGen/RuntimeLibcalls.h
@@ -29,7 +29,7 @@ namespace RTLIB {
   ///
   enum Libcall {
 #define HANDLE_LIBCALL(code, name) code,
-    #include "RuntimeLibcalls.def"
+    #include "llvm/IR/RuntimeLibcalls.def"
 #undef HANDLE_LIBCALL
   };
 
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index f3f2f05b877d..5e7837834ec8 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -76,7 +76,7 @@ class TargetRegisterInfo;
     };
 
   private:
-    /// \brief A pointer to the depending/depended-on SUnit, and an enum
+    /// A pointer to the depending/depended-on SUnit, and an enum
     /// indicating the kind of the dependency.
     PointerIntPair<SUnit *, 2, Kind> Dep;
 
@@ -137,7 +137,7 @@ class TargetRegisterInfo;
       return !operator==(Other);
     }
 
-    /// \brief Returns the latency value for this edge, which roughly means the
+    /// Returns the latency value for this edge, which roughly means the
     /// minimum number of cycles that must elapse between the predecessor and
     /// the successor, given that they have this edge between them.
     unsigned getLatency() const {
@@ -163,7 +163,7 @@ class TargetRegisterInfo;
       return getKind() != Data;
     }
 
-    /// \brief Tests if this is an Order dependence between two memory accesses
+    /// Tests if this is an Order dependence between two memory accesses
     /// where both sides of the dependence access memory in non-volatile and
     /// fully modeled ways.
     bool isNormalMemory() const {
@@ -181,7 +181,7 @@ class TargetRegisterInfo;
       return (isNormalMemory() || isBarrier());
     }
 
-    /// \brief Tests if this is an Order dependence that is marked as
+    /// Tests if this is an Order dependence that is marked as
     /// "must alias", meaning that the SUnits at either end of the edge have a
     /// memory dependence on a known memory location.
     bool isMustAlias() const {
@@ -196,13 +196,13 @@ class TargetRegisterInfo;
       return getKind() == Order && Contents.OrdKind >= Weak;
     }
 
-    /// \brief Tests if this is an Order dependence that is marked as
+    /// Tests if this is an Order dependence that is marked as
     /// "artificial", meaning it isn't necessary for correctness.
     bool isArtificial() const {
       return getKind() == Order && Contents.OrdKind == Artificial;
     }
 
-    /// \brief Tests if this is an Order dependence that is marked as "cluster",
+    /// Tests if this is an Order dependence that is marked as "cluster",
     /// meaning it is artificial and wants to be adjacent.
     bool isCluster() const {
       return getKind() == Order && Contents.OrdKind == Cluster;
@@ -308,7 +308,7 @@ class TargetRegisterInfo;
         nullptr; ///< Is a special copy node if != nullptr.
     const TargetRegisterClass *CopySrcRC = nullptr;
 
-    /// \brief Constructs an SUnit for pre-regalloc scheduling to represent an
+    /// Constructs an SUnit for pre-regalloc scheduling to represent an
     /// SDNode and any nodes flagged to it.
     SUnit(SDNode *node, unsigned nodenum)
       : Node(node), NodeNum(nodenum), isVRegCycle(false), isCall(false),
@@ -319,7 +319,7 @@ class TargetRegisterInfo;
         isUnbuffered(false), hasReservedResource(false), isDepthCurrent(false),
         isHeightCurrent(false) {}
 
-    /// \brief Constructs an SUnit for post-regalloc scheduling to represent a
+    /// Constructs an SUnit for post-regalloc scheduling to represent a
     /// MachineInstr.
     SUnit(MachineInstr *instr, unsigned nodenum)
       : Instr(instr), NodeNum(nodenum), isVRegCycle(false), isCall(false),
@@ -330,7 +330,7 @@ class TargetRegisterInfo;
         isUnbuffered(false), hasReservedResource(false), isDepthCurrent(false),
         isHeightCurrent(false) {}
 
-    /// \brief Constructs a placeholder SUnit.
+    /// Constructs a placeholder SUnit.
     SUnit()
       : isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false),
         isCommutable(false), hasPhysRegUses(false), hasPhysRegDefs(false),
@@ -339,7 +339,7 @@ class TargetRegisterInfo;
         isCloned(false), isUnbuffered(false), hasReservedResource(false),
         isDepthCurrent(false), isHeightCurrent(false) {}
 
-    /// \brief Boundary nodes are placeholders for the boundary of the
+    /// Boundary nodes are placeholders for the boundary of the
     /// scheduling region.
     ///
     /// BoundaryNodes can have DAG edges, including Data edges, but they do not
@@ -362,7 +362,7 @@ class TargetRegisterInfo;
       return Node;
     }
 
-    /// \brief Returns true if this SUnit refers to a machine instruction as
+    /// Returns true if this SUnit refers to a machine instruction as
     /// opposed to an SDNode.
     bool isInstr() const { return Instr; }
 
@@ -384,7 +384,7 @@ class TargetRegisterInfo;
     /// It also adds the current node as a successor of the specified node.
     bool addPred(const SDep &D, bool Required = true);
 
-    /// \brief Adds a barrier edge to SU by calling addPred(), with latency 0
+    /// Adds a barrier edge to SU by calling addPred(), with latency 0
     /// generally or latency 1 for a store followed by a load.
     bool addPredBarrier(SUnit *SU) {
       SDep Dep(SU, SDep::Barrier);
@@ -406,7 +406,7 @@ class TargetRegisterInfo;
       return Depth;
     }
 
-    /// \brief Returns the height of this node, which is the length of the
+    /// Returns the height of this node, which is the length of the
     /// maximum path down to any node which has no successors.
     unsigned getHeight() const {
       if (!isHeightCurrent)
@@ -414,21 +414,21 @@ class TargetRegisterInfo;
       return Height;
     }
 
-    /// \brief If NewDepth is greater than this node's depth value, sets it to
+    /// If NewDepth is greater than this node's depth value, sets it to
     /// be the new depth value. This also recursively marks successor nodes
     /// dirty.
     void setDepthToAtLeast(unsigned NewDepth);
 
-    /// \brief If NewDepth is greater than this node's depth value, set it to be
+    /// If NewDepth is greater than this node's depth value, set it to be
     /// the new height value. This also recursively marks predecessor nodes
     /// dirty.
     void setHeightToAtLeast(unsigned NewHeight);
 
-    /// \brief Sets a flag in this node to indicate that its stored Depth value
+    /// Sets a flag in this node to indicate that its stored Depth value
     /// will require recomputation the next time getDepth() is called.
     void setDepthDirty();
 
-    /// \brief Sets a flag in this node to indicate that its stored Height value
+    /// Sets a flag in this node to indicate that its stored Height value
     /// will require recomputation the next time getHeight() is called.
     void setHeightDirty();
 
@@ -455,15 +455,15 @@ class TargetRegisterInfo;
       return NumSuccsLeft == 0;
     }
 
-    /// \brief Orders this node's predecessor edges such that the critical path
+    /// Orders this node's predecessor edges such that the critical path
     /// edge occurs first.
     void biasCriticalPath();
 
     void dump(const ScheduleDAG *G) const;
     void dumpAll(const ScheduleDAG *G) const;
     raw_ostream &print(raw_ostream &O,
-                       const SUnit *N = nullptr,
-                       const SUnit *X = nullptr) const;
+                       const SUnit *Entry = nullptr,
+                       const SUnit *Exit = nullptr) const;
     raw_ostream &print(raw_ostream &O, const ScheduleDAG *G) const;
 
   private:
@@ -497,7 +497,7 @@ class TargetRegisterInfo;
 
   //===--------------------------------------------------------------------===//
 
-  /// \brief This interface is used to plug different priorities computation
+  /// This interface is used to plug different priorities computation
   /// algorithms into the list scheduler. It implements the interface of a
   /// standard priority queue, where nodes are inserted in arbitrary order and
   /// returned in priority order.  The computation of the priority and the
@@ -609,7 +609,7 @@ class TargetRegisterInfo;
     virtual void addCustomGraphFeatures(GraphWriter<ScheduleDAG*> &) const {}
 
 #ifndef NDEBUG
-    /// \brief Verifies that all SUnits were scheduled and that their state is
+    /// Verifies that all SUnits were scheduled and that their state is
     /// consistent. Returns the number of scheduled SUnits.
     unsigned VerifyScheduledDAG(bool isBottomUp);
 #endif
@@ -708,7 +708,7 @@ class TargetRegisterInfo;
     /// method.
     void DFS(const SUnit *SU, int UpperBound, bool& HasLoop);
 
-    /// \brief Reassigns topological indexes for the nodes in the DAG to
+    /// Reassigns topological indexes for the nodes in the DAG to
     /// preserve the topological ordering.
     void Shift(BitVector& Visited, int LowerBound, int UpperBound);
 
@@ -735,11 +735,11 @@ class TargetRegisterInfo;
     /// Returns true if addPred(TargetSU, SU) creates a cycle.
     bool WillCreateCycle(SUnit *TargetSU, SUnit *SU);
 
-    /// \brief Updates the topological ordering to accommodate an edge to be
+    /// Updates the topological ordering to accommodate an edge to be
     /// added from SUnit \p X to SUnit \p Y.
     void AddPred(SUnit *Y, SUnit *X);
 
-    /// \brief Updates the topological ordering to accommodate an an edge to be
+    /// Updates the topological ordering to accommodate an an edge to be
     /// removed from the specified node \p N from the predecessors of the
     /// current node \p M.
     void RemovePred(SUnit *M, SUnit *N);
diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h
index 14882205584e..520a23846f6e 100644
--- a/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -190,7 +190,7 @@ namespace llvm {
     using SUList = std::list<SUnit *>;
 
   protected:
-    /// \brief A map from ValueType to SUList, used during DAG construction, as
+    /// A map from ValueType to SUList, used during DAG construction, as
     /// a means of remembering which SUs depend on which memory locations.
     class Value2SUsMap;
 
@@ -201,7 +201,7 @@ namespace llvm {
     void reduceHugeMemNodeMaps(Value2SUsMap &stores,
                                Value2SUsMap &loads, unsigned N);
 
-    /// \brief Adds a chain edge between SUa and SUb, but only if both
+    /// Adds a chain edge between SUa and SUb, but only if both
     /// AliasAnalysis and Target fail to deny the dependency.
     void addChainDependency(SUnit *SUa, SUnit *SUb,
                             unsigned Latency = 0);
@@ -286,7 +286,7 @@ namespace llvm {
     /// Cleans up after scheduling in the given block.
     virtual void finishBlock();
 
-    /// \brief Initialize the DAG and common scheduler state for a new
+    /// Initialize the DAG and common scheduler state for a new
     /// scheduling region. This does not actually create the DAG, only clears
     /// it. The scheduling driver may call BuildSchedGraph multiple times per
     /// scheduling region.
@@ -308,7 +308,7 @@ namespace llvm {
                          LiveIntervals *LIS = nullptr,
                          bool TrackLaneMasks = false);
 
-    /// \brief Adds dependencies from instructions in the current list of
+    /// Adds dependencies from instructions in the current list of
     /// instructions being scheduled to scheduling barrier. We want to make sure
     /// instructions which define registers that are either used by the
     /// terminator or are live-out are properly scheduled. This is especially
diff --git a/include/llvm/CodeGen/ScheduleDFS.h b/include/llvm/CodeGen/ScheduleDFS.h
index d6a8c791392c..3ecc033ac35a 100644
--- a/include/llvm/CodeGen/ScheduleDFS.h
+++ b/include/llvm/CodeGen/ScheduleDFS.h
@@ -25,7 +25,7 @@ namespace llvm {
 
 class raw_ostream;
 
-/// \brief Represent the ILP of the subDAG rooted at a DAG node.
+/// Represent the ILP of the subDAG rooted at a DAG node.
 ///
 /// ILPValues summarize the DAG subtree rooted at each node. ILPValues are
 /// valid for all nodes regardless of their subtree membership.
@@ -62,13 +62,13 @@ struct ILPValue {
   void dump() const;
 };
 
-/// \brief Compute the values of each DAG node for various metrics during DFS.
+/// Compute the values of each DAG node for various metrics during DFS.
 class SchedDFSResult {
   friend class SchedDFSImpl;
 
   static const unsigned InvalidSubtreeID = ~0u;
 
-  /// \brief Per-SUnit data computed during DFS for various metrics.
+  /// Per-SUnit data computed during DFS for various metrics.
   ///
   /// A node's SubtreeID is set to itself when it is visited to indicate that it
   /// is the root of a subtree. Later it is set to its parent to indicate an
@@ -81,7 +81,7 @@ class SchedDFSResult {
     NodeData() = default;
   };
 
-  /// \brief Per-Subtree data computed during DFS.
+  /// Per-Subtree data computed during DFS.
   struct TreeData {
     unsigned ParentTreeID = InvalidSubtreeID;
     unsigned SubInstrCount = 0;
@@ -89,7 +89,7 @@ class SchedDFSResult {
     TreeData() = default;
   };
 
-  /// \brief Record a connection between subtrees and the connection level.
+  /// Record a connection between subtrees and the connection level.
   struct Connection {
     unsigned TreeID;
     unsigned Level;
@@ -117,15 +117,15 @@ public:
   SchedDFSResult(bool IsBU, unsigned lim)
     : IsBottomUp(IsBU), SubtreeLimit(lim) {}
 
-  /// \brief Get the node cutoff before subtrees are considered significant.
+  /// Get the node cutoff before subtrees are considered significant.
   unsigned getSubtreeLimit() const { return SubtreeLimit; }
 
-  /// \brief Return true if this DFSResult is uninitialized.
+  /// Return true if this DFSResult is uninitialized.
   ///
   /// resize() initializes DFSResult, while compute() populates it.
   bool empty() const { return DFSNodeData.empty(); }
 
-  /// \brief Clear the results.
+  /// Clear the results.
   void clear() {
     DFSNodeData.clear();
     DFSTreeData.clear();
@@ -133,37 +133,37 @@ public:
     SubtreeConnectLevels.clear();
   }
 
-  /// \brief Initialize the result data with the size of the DAG.
+  /// Initialize the result data with the size of the DAG.
   void resize(unsigned NumSUnits) {
     DFSNodeData.resize(NumSUnits);
   }
 
-  /// \brief Compute various metrics for the DAG with given roots.
+  /// Compute various metrics for the DAG with given roots.
   void compute(ArrayRef<SUnit> SUnits);
 
-  /// \brief Get the number of instructions in the given subtree and its
+  /// Get the number of instructions in the given subtree and its
   /// children.
   unsigned getNumInstrs(const SUnit *SU) const {
     return DFSNodeData[SU->NodeNum].InstrCount;
   }
 
-  /// \brief Get the number of instructions in the given subtree not including
+  /// Get the number of instructions in the given subtree not including
   /// children.
   unsigned getNumSubInstrs(unsigned SubtreeID) const {
     return DFSTreeData[SubtreeID].SubInstrCount;
   }
 
-  /// \brief Get the ILP value for a DAG node.
+  /// Get the ILP value for a DAG node.
   ///
   /// A leaf node has an ILP of 1/1.
   ILPValue getILP(const SUnit *SU) const {
     return ILPValue(DFSNodeData[SU->NodeNum].InstrCount, 1 + SU->getDepth());
   }
 
-  /// \brief The number of subtrees detected in this DAG.
+  /// The number of subtrees detected in this DAG.
   unsigned getNumSubtrees() const { return SubtreeConnectLevels.size(); }
 
-  /// \brief Get the ID of the subtree the given DAG node belongs to.
+  /// Get the ID of the subtree the given DAG node belongs to.
   ///
   /// For convenience, if DFSResults have not been computed yet, give everything
   /// tree ID 0.
@@ -174,7 +174,7 @@ public:
     return DFSNodeData[SU->NodeNum].SubtreeID;
   }
 
-  /// \brief Get the connection level of a subtree.
+  /// Get the connection level of a subtree.
   ///
   /// For bottom-up trees, the connection level is the latency depth (in cycles)
   /// of the deepest connection to another subtree.
@@ -182,7 +182,7 @@ public:
     return SubtreeConnectLevels[SubtreeID];
   }
 
-  /// \brief Scheduler callback to update SubtreeConnectLevels when a tree is
+  /// Scheduler callback to update SubtreeConnectLevels when a tree is
   /// initially scheduled.
   void scheduleTree(unsigned SubtreeID);
 };
diff --git a/include/llvm/CodeGen/ScoreboardHazardRecognizer.h b/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
index 466ab532030c..3f75d108f282 100644
--- a/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
+++ b/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
@@ -106,7 +106,7 @@ class ScoreboardHazardRecognizer : public ScheduleHazardRecognizer {
   Scoreboard RequiredScoreboard;
 
 public:
-  ScoreboardHazardRecognizer(const InstrItineraryData *ItinData,
+  ScoreboardHazardRecognizer(const InstrItineraryData *II,
                              const ScheduleDAG *DAG,
                              const char *ParentDebugType = "");
 
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index 6a5c2db34bb1..888f9425ff90 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -28,11 +28,12 @@
 #include "llvm/ADT/iterator.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/DivergenceAnalysis.h"
 #include "llvm/CodeGen/DAGCombine.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/DebugLoc.h"
@@ -44,6 +45,7 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
 #include "llvm/Support/RecyclingAllocator.h"
 #include <algorithm>
 #include <cassert>
@@ -71,8 +73,10 @@ class MachineConstantPoolValue;
 class MCSymbol;
 class OptimizationRemarkEmitter;
 class SDDbgValue;
+class SDDbgLabel;
 class SelectionDAG;
 class SelectionDAGTargetInfo;
+class TargetLibraryInfo;
 class TargetLowering;
 class TargetMachine;
 class TargetSubtargetInfo;
@@ -145,6 +149,7 @@ class SDDbgInfo {
   BumpPtrAllocator Alloc;
   SmallVector<SDDbgValue*, 32> DbgValues;
   SmallVector<SDDbgValue*, 32> ByvalParmDbgValues;
+  SmallVector<SDDbgLabel*, 4> DbgLabels;
   using DbgValMapType = DenseMap<const SDNode *, SmallVector<SDDbgValue *, 2>>;
   DbgValMapType DbgValMap;
 
@@ -161,7 +166,11 @@ public:
       DbgValMap[Node].push_back(V);
   }
 
-  /// \brief Invalidate all DbgValues attached to the node and remove
+  void add(SDDbgLabel *L) {
+    DbgLabels.push_back(L);
+  }
+
+  /// Invalidate all DbgValues attached to the node and remove
   /// it from the Node-to-DbgValues map.
   void erase(const SDNode *Node);
 
@@ -169,13 +178,14 @@ public:
     DbgValMap.clear();
     DbgValues.clear();
     ByvalParmDbgValues.clear();
+    DbgLabels.clear();
     Alloc.Reset();
   }
 
   BumpPtrAllocator &getAlloc() { return Alloc; }
 
   bool empty() const {
-    return DbgValues.empty() && ByvalParmDbgValues.empty();
+    return DbgValues.empty() && ByvalParmDbgValues.empty() && DbgLabels.empty();
   }
 
   ArrayRef<SDDbgValue*> getSDDbgValues(const SDNode *Node) {
@@ -186,11 +196,14 @@ public:
   }
 
   using DbgIterator = SmallVectorImpl<SDDbgValue*>::iterator;
+  using DbgLabelIterator = SmallVectorImpl<SDDbgLabel*>::iterator;
 
   DbgIterator DbgBegin() { return DbgValues.begin(); }
   DbgIterator DbgEnd()   { return DbgValues.end(); }
   DbgIterator ByvalParmDbgBegin() { return ByvalParmDbgValues.begin(); }
   DbgIterator ByvalParmDbgEnd()   { return ByvalParmDbgValues.end(); }
+  DbgLabelIterator DbgLabelBegin() { return DbgLabels.begin(); }
+  DbgLabelIterator DbgLabelEnd()   { return DbgLabels.end(); }
 };
 
 void checkForCycles(const SelectionDAG *DAG, bool force = false);
@@ -210,11 +223,15 @@ class SelectionDAG {
   const TargetMachine &TM;
   const SelectionDAGTargetInfo *TSI = nullptr;
   const TargetLowering *TLI = nullptr;
+  const TargetLibraryInfo *LibInfo = nullptr;
   MachineFunction *MF;
   Pass *SDAGISelPass = nullptr;
   LLVMContext *Context;
   CodeGenOpt::Level OptLevel;
 
+  DivergenceAnalysis * DA = nullptr;
+  FunctionLoweringInfo * FLI = nullptr;
+
   /// The function-level optimization remark emitter.  Used to emit remarks
   /// whenever manipulating the DAG.
   OptimizationRemarkEmitter *ORE;
@@ -248,7 +265,7 @@ class SelectionDAG {
   /// Pool allocation for misc. objects that are created once per SelectionDAG.
   BumpPtrAllocator Allocator;
 
-  /// Tracks dbg_value information through SDISel.
+  /// Tracks dbg_value and dbg_label information through SDISel.
   SDDbgInfo *DbgInfo;
 
   uint16_t NextPersistentId = 0;
@@ -344,19 +361,7 @@ private:
          .getRawSubclassData();
   }
 
-  void createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {
-    assert(!Node->OperandList && "Node already has operands");
-    SDUse *Ops = OperandRecycler.allocate(
-        ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator);
-
-    for (unsigned I = 0; I != Vals.size(); ++I) {
-      Ops[I].setUser(Node);
-      Ops[I].setInitial(Vals[I]);
-    }
-    Node->NumOperands = Vals.size();
-    Node->OperandList = Ops;
-    checkForCycles(Node);
-  }
+  void createOperands(SDNode *Node, ArrayRef<SDValue> Vals);
 
   void removeOperands(SDNode *Node) {
     if (!Node->OperandList)
@@ -367,7 +372,7 @@ private:
     Node->NumOperands = 0;
     Node->OperandList = nullptr;
   }
-
+  void CreateTopologicalOrder(std::vector<SDNode*>& Order);
 public:
   explicit SelectionDAG(const TargetMachine &TM, CodeGenOpt::Level);
   SelectionDAG(const SelectionDAG &) = delete;
@@ -376,7 +381,12 @@ public:
 
   /// Prepare this SelectionDAG to process code in the given MachineFunction.
   void init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE,
-            Pass *PassPtr);
+            Pass *PassPtr, const TargetLibraryInfo *LibraryInfo,
+            DivergenceAnalysis * Divergence);
+
+  void setFunctionLoweringInfo(FunctionLoweringInfo * FuncInfo) {
+    FLI = FuncInfo;
+  }
 
   /// Clear state and free memory necessary to make this
   /// SelectionDAG ready to process a new block.
@@ -389,6 +399,7 @@ public:
   const TargetMachine &getTarget() const { return TM; }
   const TargetSubtargetInfo &getSubtarget() const { return MF->getSubtarget(); }
   const TargetLowering &getTargetLoweringInfo() const { return *TLI; }
+  const TargetLibraryInfo &getLibInfo() const { return *LibInfo; }
   const SelectionDAGTargetInfo &getSelectionDAGInfo() const { return *TSI; }
   LLVMContext *getContext() const {return Context; }
   OptimizationRemarkEmitter &getORE() const { return *ORE; }
@@ -460,6 +471,8 @@ public:
     return Root;
   }
 
+  void VerifyDAGDiverence();
+
   /// This iterates over the nodes in the SelectionDAG, folding
   /// certain types of nodes together, or eliminating superfluous nodes.  The
   /// Level argument controls whether Combine is allowed to produce nodes and
@@ -483,7 +496,7 @@ public:
   /// the graph.
   void Legalize();
 
-  /// \brief Transforms a SelectionDAG node and any operands to it into a node
+  /// Transforms a SelectionDAG node and any operands to it into a node
   /// that is compatible with the target instruction selector, as indicated by
   /// the TargetLowering object.
   ///
@@ -534,7 +547,7 @@ public:
   //===--------------------------------------------------------------------===//
   // Node creation methods.
 
-  /// \brief Create a ConstantSDNode wrapping a constant value.
+  /// Create a ConstantSDNode wrapping a constant value.
   /// If VT is a vector type, the constant is splatted into a BUILD_VECTOR.
   ///
   /// If only legal types can be produced, this does the necessary
@@ -567,9 +580,13 @@ public:
                             bool isOpaque = false) {
     return getConstant(Val, DL, VT, true, isOpaque);
   }
+
+  /// Create a true or false constant of type \p VT using the target's
+  /// BooleanContent for type \p OpVT.
+  SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT);
   /// @}
 
-  /// \brief Create a ConstantFPSDNode wrapping a constant value.
+  /// Create a ConstantFPSDNode wrapping a constant value.
   /// If VT is a vector type, the constant is splatted into a BUILD_VECTOR.
   ///
   /// If only legal types can be produced, this does the necessary
@@ -581,7 +598,7 @@ public:
                         bool isTarget = false);
   SDValue getConstantFP(const APFloat &Val, const SDLoc &DL, EVT VT,
                         bool isTarget = false);
-  SDValue getConstantFP(const ConstantFP &CF, const SDLoc &DL, EVT VT,
+  SDValue getConstantFP(const ConstantFP &V, const SDLoc &DL, EVT VT,
                         bool isTarget = false);
   SDValue getTargetConstantFP(double Val, const SDLoc &DL, EVT VT) {
     return getConstantFP(Val, DL, VT, true);
@@ -741,7 +758,7 @@ public:
     return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
   }
 
-  /// \brief Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to
+  /// Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to
   /// the shuffle node in input but with swapped operands.
   ///
   /// Example: shuffle A, B, <0,5,2,7> -> shuffle B, A, <4,1,6,3>
@@ -765,7 +782,7 @@ public:
 
   /// Return the expression required to zero extend the Op
   /// value assuming it was the smaller SrcTy value.
-  SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT SrcTy);
+  SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT);
 
   /// Return an operation which will any-extend the low lanes of the operand
   /// into the specified vector type. For example,
@@ -793,10 +810,10 @@ public:
   /// Create a bitwise NOT operation as (XOR Val, -1).
   SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT);
 
-  /// \brief Create a logical NOT operation as (XOR Val, BooleanOne).
+  /// Create a logical NOT operation as (XOR Val, BooleanOne).
   SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT);
 
-  /// \brief Create an add instruction with appropriate flags when used for
+  /// Create an add instruction with appropriate flags when used for
   /// addressing some offset of an object. i.e. if a load is split into multiple
   /// components, create an add nuw from the base pointer to the offset.
   SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Op, int64_t Offset) {
@@ -862,17 +879,18 @@ public:
                   ArrayRef<SDValue> Ops, const SDNodeFlags Flags = SDNodeFlags());
   SDValue getNode(unsigned Opcode, const SDLoc &DL, ArrayRef<EVT> ResultTys,
                   ArrayRef<SDValue> Ops);
-  SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTs,
+  SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
                   ArrayRef<SDValue> Ops);
 
   // Specialize based on number of operands.
   SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT);
-  SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N,
+  SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand,
                   const SDNodeFlags Flags = SDNodeFlags());
   SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
                   SDValue N2, const SDNodeFlags Flags = SDNodeFlags());
   SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
-                  SDValue N2, SDValue N3);
+                  SDValue N2, SDValue N3,
+                  const SDNodeFlags Flags = SDNodeFlags());
   SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
                   SDValue N2, SDValue N3, SDValue N4);
   SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
@@ -880,15 +898,15 @@ public:
 
   // Specialize again based on number of operands for nodes with a VTList
   // rather than a single VT.
-  SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTs);
-  SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTs, SDValue N);
-  SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTs, SDValue N1,
+  SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList);
+  SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, SDValue N);
+  SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, SDValue N1,
                   SDValue N2);
-  SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTs, SDValue N1,
+  SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, SDValue N1,
                   SDValue N2, SDValue N3);
-  SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTs, SDValue N1,
+  SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, SDValue N1,
                   SDValue N2, SDValue N3, SDValue N4);
-  SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTs, SDValue N1,
+  SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, SDValue N1,
                   SDValue N2, SDValue N3, SDValue N4, SDValue N5);
 
   /// Compute a TokenFactor to force all the incoming stack arguments to be
@@ -910,6 +928,23 @@ public:
                     SDValue Size, unsigned Align, bool isVol, bool isTailCall,
                     MachinePointerInfo DstPtrInfo);
 
+  SDValue getAtomicMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
+                          unsigned DstAlign, SDValue Src, unsigned SrcAlign,
+                          SDValue Size, Type *SizeTy, unsigned ElemSz,
+                          bool isTailCall, MachinePointerInfo DstPtrInfo,
+                          MachinePointerInfo SrcPtrInfo);
+
+  SDValue getAtomicMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
+                           unsigned DstAlign, SDValue Src, unsigned SrcAlign,
+                           SDValue Size, Type *SizeTy, unsigned ElemSz,
+                           bool isTailCall, MachinePointerInfo DstPtrInfo,
+                           MachinePointerInfo SrcPtrInfo);
+
+  SDValue getAtomicMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
+                          unsigned DstAlign, SDValue Value, SDValue Size,
+                          Type *SizeTy, unsigned ElemSz, bool isTailCall,
+                          MachinePointerInfo DstPtrInfo);
+
   /// Helper function to make it easier to build SetCC's if you just
   /// have an ISD::CondCode instead of an SDValue.
   ///
@@ -1050,12 +1085,12 @@ public:
                    MachineMemOperand *MMO);
   SDValue
   getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr,
-                MachinePointerInfo PtrInfo, EVT TVT, unsigned Alignment = 0,
+                MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment = 0,
                 MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
                 const AAMDNodes &AAInfo = AAMDNodes());
   SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
-                        SDValue Ptr, EVT TVT, MachineMemOperand *MMO);
-  SDValue getIndexedStore(SDValue OrigStoe, const SDLoc &dl, SDValue Base,
+                        SDValue Ptr, EVT SVT, MachineMemOperand *MMO);
+  SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base,
                           SDValue Offset, ISD::MemIndexedMode AM);
 
   /// Returns sum of the base pointer and offset.
@@ -1121,28 +1156,31 @@ public:
                                SDValue Op3, SDValue Op4, SDValue Op5);
   SDNode *UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops);
 
+  // Propagates the change in divergence to users
+  void updateDivergence(SDNode * N);
+
   /// These are used for target selectors to *mutate* the
   /// specified node to have the specified return type, Target opcode, and
   /// operands.  Note that target opcodes are stored as
   /// ~TargetOpcode in the node opcode field.  The resultant node is returned.
-  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT);
-  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT, SDValue Op1);
-  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT,
+  SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT);
+  SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT, SDValue Op1);
+  SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT,
                        SDValue Op1, SDValue Op2);
-  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT,
+  SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT,
                        SDValue Op1, SDValue Op2, SDValue Op3);
-  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT,
+  SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT,
                        ArrayRef<SDValue> Ops);
-  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1, EVT VT2);
-  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1,
+  SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1, EVT VT2);
+  SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1,
                        EVT VT2, ArrayRef<SDValue> Ops);
-  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1,
+  SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1,
                        EVT VT2, EVT VT3, ArrayRef<SDValue> Ops);
   SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1,
                        EVT VT2, SDValue Op1);
-  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1,
+  SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1,
                        EVT VT2, SDValue Op1, SDValue Op2);
-  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, SDVTList VTs,
+  SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, SDVTList VTs,
                        ArrayRef<SDValue> Ops);
 
   /// This *mutates* the specified node to have the specified
@@ -1197,7 +1235,7 @@ public:
                                 SDValue Operand, SDValue Subreg);
 
   /// Get the specified node if it's already available, or else return NULL.
-  SDNode *getNodeIfExists(unsigned Opcode, SDVTList VTs, ArrayRef<SDValue> Ops,
+  SDNode *getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef<SDValue> Ops,
                           const SDNodeFlags Flags = SDNodeFlags());
 
   /// Creates a SDDbgValue node.
@@ -1212,8 +1250,16 @@ public:
 
   /// Creates a FrameIndex SDDbgValue node.
   SDDbgValue *getFrameIndexDbgValue(DIVariable *Var, DIExpression *Expr,
-                                    unsigned FI, const DebugLoc &DL,
-                                    unsigned O);
+                                    unsigned FI, bool IsIndirect,
+                                    const DebugLoc &DL, unsigned O);
+
+  /// Creates a VReg SDDbgValue node.
+  SDDbgValue *getVRegDbgValue(DIVariable *Var, DIExpression *Expr,
+                              unsigned VReg, bool IsIndirect,
+                              const DebugLoc &DL, unsigned O);
+
+  /// Creates a SDDbgLabel node.
+  SDDbgLabel *getDbgLabel(DILabel *Label, const DebugLoc &DL, unsigned O);
 
   /// Transfer debug values from one node to another, while optionally
   /// generating fragment expressions for split-up values. If \p InvalidateDbg
@@ -1245,7 +1291,7 @@ public:
   /// to be given new uses. These new uses of From are left in place, and
   /// not automatically transferred to To.
   ///
-  void ReplaceAllUsesWith(SDValue From, SDValue Op);
+  void ReplaceAllUsesWith(SDValue From, SDValue To);
   void ReplaceAllUsesWith(SDNode *From, SDNode *To);
   void ReplaceAllUsesWith(SDNode *From, const SDValue *To);
 
@@ -1296,6 +1342,9 @@ public:
   /// value is produced by SD.
   void AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter);
 
+  /// Add a dbg_label SDNode.
+  void AddDbgLabel(SDDbgLabel *DB);
+
   /// Get the debug values which reference the given SDNode.
   ArrayRef<SDDbgValue*> GetDbgValues(const SDNode* SD) {
     return DbgInfo->getSDDbgValues(SD);
@@ -1317,6 +1366,13 @@ public:
     return DbgInfo->ByvalParmDbgEnd();
   }
 
+  SDDbgInfo::DbgLabelIterator DbgLabelBegin() {
+    return DbgInfo->DbgLabelBegin();
+  }
+  SDDbgInfo::DbgLabelIterator DbgLabelEnd() {
+    return DbgInfo->DbgLabelEnd();
+  }
+
   /// To be invoked on an SDNode that is slated to be erased. This
   /// function mirrors \c llvm::salvageDebugInfo.
   void salvageDebugInfo(SDNode &N);
@@ -1431,8 +1487,11 @@ public:
   /// Test whether the given SDValue is known to never be NaN.
   bool isKnownNeverNaN(SDValue Op) const;
 
-  /// Test whether the given SDValue is known to never be positive or negative
-  /// zero.
+  /// Test whether the given floating point SDValue is known to never be
+  /// positive or negative zero.
+  bool isKnownNeverZeroFloat(SDValue Op) const;
+
+  /// Test whether the given SDValue is known to contain non-zero value(s).
   bool isKnownNeverZero(SDValue Op) const;
 
   /// Test whether two SDValues are known to compare equal. This
diff --git a/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h b/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
index 18e4c7a83def..580606441a9d 100644
--- a/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
+++ b/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
@@ -56,7 +56,7 @@ public:
                       int64_t &Off);
 
   /// Parses tree in Ptr for base, index, offset addresses.
-  static BaseIndexOffset match(SDValue Ptr, const SelectionDAG &DAG);
+  static BaseIndexOffset match(LSBaseSDNode *N, const SelectionDAG &DAG);
 };
 
 } // end namespace llvm
diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h
index de6849a1eae1..86df0af7303f 100644
--- a/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/include/llvm/CodeGen/SelectionDAGISel.h
@@ -110,6 +110,11 @@ public:
                             CodeGenOpt::Level OptLevel,
                             bool IgnoreChains = false);
 
+  static void InvalidateNodeId(SDNode *N);
+  static int getUninvalidatedNodeId(SDNode *N);
+
+  static void EnforceNodeIdInvariant(SDNode *N);
+
   // Opcodes used by the DAG state machine:
   enum BuiltinOpcodes {
     OPC_Scope,
@@ -199,23 +204,28 @@ protected:
   /// of the new node T.
   void ReplaceUses(SDValue F, SDValue T) {
     CurDAG->ReplaceAllUsesOfValueWith(F, T);
+    EnforceNodeIdInvariant(T.getNode());
   }
 
   /// ReplaceUses - replace all uses of the old nodes F with the use
   /// of the new nodes T.
   void ReplaceUses(const SDValue *F, const SDValue *T, unsigned Num) {
     CurDAG->ReplaceAllUsesOfValuesWith(F, T, Num);
+    for (unsigned i = 0; i < Num; ++i)
+      EnforceNodeIdInvariant(T[i].getNode());
   }
 
   /// ReplaceUses - replace all uses of the old node F with the use
   /// of the new node T.
   void ReplaceUses(SDNode *F, SDNode *T) {
     CurDAG->ReplaceAllUsesWith(F, T);
+    EnforceNodeIdInvariant(T);
   }
 
   /// Replace all uses of \c F with \c T, then remove \c F from the DAG.
   void ReplaceNode(SDNode *F, SDNode *T) {
     CurDAG->ReplaceAllUsesWith(F, T);
+    EnforceNodeIdInvariant(T);
     CurDAG->RemoveDeadNode(F);
   }
 
@@ -270,7 +280,7 @@ public:
   void SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
                         unsigned TableSize);
 
-  /// \brief Return true if complex patterns for this target can mutate the
+  /// Return true if complex patterns for this target can mutate the
   /// DAG.
   virtual bool ComplexPatternFuncMutatesDAG() const {
     return false;
@@ -282,14 +292,14 @@ private:
 
   // Calls to these functions are generated by tblgen.
   void Select_INLINEASM(SDNode *N);
-  void Select_READ_REGISTER(SDNode *N);
-  void Select_WRITE_REGISTER(SDNode *N);
+  void Select_READ_REGISTER(SDNode *Op);
+  void Select_WRITE_REGISTER(SDNode *Op);
   void Select_UNDEF(SDNode *N);
   void CannotYetSelect(SDNode *N);
 
 private:
   void DoInstructionSelection();
-  SDNode *MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTs,
+  SDNode *MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
                     ArrayRef<SDValue> Ops, unsigned EmitNodeInfo);
 
   SDNode *MutateStrictFPToFP(SDNode *Node, unsigned NewOpc);
@@ -299,10 +309,10 @@ private:
   /// instruction selected, false if no code should be emitted for it.
   bool PrepareEHLandingPad();
 
-  /// \brief Perform instruction selection on all basic blocks in the function.
+  /// Perform instruction selection on all basic blocks in the function.
   void SelectAllBasicBlocks(const Function &Fn);
 
-  /// \brief Perform instruction selection on a single basic block, for
+  /// Perform instruction selection on a single basic block, for
   /// instructions between \p Begin and \p End.  \p HadTailCall will be set
   /// to true if a call in the block was translated as a tail call.
   void SelectBasicBlock(BasicBlock::const_iterator Begin,
@@ -312,7 +322,7 @@ private:
 
   void CodeGenAndEmitDAG();
 
-  /// \brief Generate instructions for lowering the incoming arguments of the
+  /// Generate instructions for lowering the incoming arguments of the
   /// given function.
   void LowerArguments(const Function &F);
 
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 522c2f1b2cb2..1af22185d366 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -31,17 +31,18 @@
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Metadata.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
 #include <algorithm>
 #include <cassert>
 #include <climits>
@@ -189,8 +190,10 @@ public:
   inline bool isUndef() const;
   inline unsigned getMachineOpcode() const;
   inline const DebugLoc &getDebugLoc() const;
-  inline void dump(const SelectionDAG *G = nullptr) const;
-  inline void dumpr(const SelectionDAG *G = nullptr) const;
+  inline void dump() const;
+  inline void dump(const SelectionDAG *G) const;
+  inline void dumpr() const;
+  inline void dumpr(const SelectionDAG *G) const;
 
   /// Return true if this operand (which must be a chain) reaches the
   /// specified operand without crossing any side-effecting instructions.
@@ -357,21 +360,34 @@ private:
   bool NoUnsignedWrap : 1;
   bool NoSignedWrap : 1;
   bool Exact : 1;
-  bool UnsafeAlgebra : 1;
   bool NoNaNs : 1;
   bool NoInfs : 1;
   bool NoSignedZeros : 1;
   bool AllowReciprocal : 1;
   bool VectorReduction : 1;
   bool AllowContract : 1;
+  bool ApproximateFuncs : 1;
+  bool AllowReassociation : 1;
 
 public:
   /// Default constructor turns off all optimization flags.
   SDNodeFlags()
       : AnyDefined(false), NoUnsignedWrap(false), NoSignedWrap(false),
-        Exact(false), UnsafeAlgebra(false), NoNaNs(false), NoInfs(false),
+        Exact(false), NoNaNs(false), NoInfs(false),
         NoSignedZeros(false), AllowReciprocal(false), VectorReduction(false),
-        AllowContract(false) {}
+        AllowContract(false), ApproximateFuncs(false),
+        AllowReassociation(false) {}
+
+  /// Propagate the fast-math-flags from an IR FPMathOperator.
+  void copyFMF(const FPMathOperator &FPMO) {
+    setNoNaNs(FPMO.hasNoNaNs());
+    setNoInfs(FPMO.hasNoInfs());
+    setNoSignedZeros(FPMO.hasNoSignedZeros());
+    setAllowReciprocal(FPMO.hasAllowReciprocal());
+    setAllowContract(FPMO.hasAllowContract());
+    setApproximateFuncs(FPMO.hasApproxFunc());
+    setAllowReassociation(FPMO.hasAllowReassoc());
+  }
 
   /// Sets the state of the flags to the defined state.
   void setDefined() { AnyDefined = true; }
@@ -391,10 +407,6 @@ public:
     setDefined();
     Exact = b;
   }
-  void setUnsafeAlgebra(bool b) {
-    setDefined();
-    UnsafeAlgebra = b;
-  }
   void setNoNaNs(bool b) {
     setDefined();
     NoNaNs = b;
@@ -419,18 +431,32 @@ public:
     setDefined();
     AllowContract = b;
   }
+  void setApproximateFuncs(bool b) {
+    setDefined();
+    ApproximateFuncs = b;
+  }
+  void setAllowReassociation(bool b) {
+    setDefined();
+    AllowReassociation = b;
+  }
 
   // These are accessors for each flag.
   bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
   bool hasNoSignedWrap() const { return NoSignedWrap; }
   bool hasExact() const { return Exact; }
-  bool hasUnsafeAlgebra() const { return UnsafeAlgebra; }
   bool hasNoNaNs() const { return NoNaNs; }
   bool hasNoInfs() const { return NoInfs; }
   bool hasNoSignedZeros() const { return NoSignedZeros; }
   bool hasAllowReciprocal() const { return AllowReciprocal; }
   bool hasVectorReduction() const { return VectorReduction; }
   bool hasAllowContract() const { return AllowContract; }
+  bool hasApproximateFuncs() const { return ApproximateFuncs; }
+  bool hasAllowReassociation() const { return AllowReassociation; }
+
+  bool isFast() const {
+    return NoSignedZeros && AllowReciprocal && NoNaNs && NoInfs &&
+           AllowContract && ApproximateFuncs && AllowReassociation;
+  }
 
   /// Clear any flags in this flag set that aren't also set in Flags.
   /// If the given Flags are undefined then don't do anything.
@@ -440,13 +466,14 @@ public:
     NoUnsignedWrap &= Flags.NoUnsignedWrap;
     NoSignedWrap &= Flags.NoSignedWrap;
     Exact &= Flags.Exact;
-    UnsafeAlgebra &= Flags.UnsafeAlgebra;
     NoNaNs &= Flags.NoNaNs;
     NoInfs &= Flags.NoInfs;
     NoSignedZeros &= Flags.NoSignedZeros;
     AllowReciprocal &= Flags.AllowReciprocal;
     VectorReduction &= Flags.VectorReduction;
     AllowContract &= Flags.AllowContract;
+    ApproximateFuncs &= Flags.ApproximateFuncs;
+    AllowReassociation &= Flags.AllowReassociation;
   }
 };
 
@@ -466,11 +493,13 @@ protected:
     friend class SDNode;
     friend class MemIntrinsicSDNode;
     friend class MemSDNode;
+    friend class SelectionDAG;
 
     uint16_t HasDebugValue : 1;
     uint16_t IsMemIntrinsic : 1;
+    uint16_t IsDivergent : 1;
   };
-  enum { NumSDNodeBits = 2 };
+  enum { NumSDNodeBits = 3 };
 
   class ConstantSDNodeBitfields {
     friend class ConstantSDNode;
@@ -540,7 +569,7 @@ protected:
   static_assert(sizeof(ConstantSDNodeBitfields) <= 2, "field too wide");
   static_assert(sizeof(MemSDNodeBitfields) <= 2, "field too wide");
   static_assert(sizeof(LSBaseSDNodeBitfields) <= 2, "field too wide");
-  static_assert(sizeof(LoadSDNodeBitfields) <= 4, "field too wide");
+  static_assert(sizeof(LoadSDNodeBitfields) <= 2, "field too wide");
   static_assert(sizeof(StoreSDNodeBitfields) <= 2, "field too wide");
 
 private:
@@ -662,6 +691,8 @@ public:
   bool getHasDebugValue() const { return SDNodeBits.HasDebugValue; }
   void setHasDebugValue(bool b) { SDNodeBits.HasDebugValue = b; }
 
+  bool isDivergent() const { return SDNodeBits.IsDivergent; }
+
   /// Return true if there are no uses of this node.
   bool use_empty() const { return UseList == nullptr; }
 
@@ -796,16 +827,44 @@ public:
   /// searches to be performed in parallel, caching of results across
   /// queries and incremental addition to Worklist. Stops early if N is
   /// found but will resume. Remember to clear Visited and Worklists
-  /// if DAG changes.
+  /// if DAG changes. MaxSteps gives a maximum number of nodes to visit before
+  /// giving up. The TopologicalPrune flag signals that positive NodeIds are
+  /// topologically ordered (Operands have strictly smaller node id) and search
+  /// can be pruned leveraging this.
   static bool hasPredecessorHelper(const SDNode *N,
                                    SmallPtrSetImpl<const SDNode *> &Visited,
                                    SmallVectorImpl<const SDNode *> &Worklist,
-                                   unsigned int MaxSteps = 0) {
+                                   unsigned int MaxSteps = 0,
+                                   bool TopologicalPrune = false) {
+    SmallVector<const SDNode *, 8> DeferredNodes;
     if (Visited.count(N))
       return true;
+
+    // Node Id's are assigned in three places: As a topological
+    // ordering (> 0), during legalization (results in values set to
+    // 0), new nodes (set to -1). If N has a topolgical id then we
+    // know that all nodes with ids smaller than it cannot be
+    // successors and we need not check them. Filter out all node
+    // that can't be matches. We add them to the worklist before exit
+    // in case of multiple calls. Note that during selection the topological id
+    // may be violated if a node's predecessor is selected before it. We mark
+    // this at selection negating the id of unselected successors and
+    // restricting topological pruning to positive ids.
+
+    int NId = N->getNodeId();
+    // If we Invalidated the Id, reconstruct original NId.
+    if (NId < -1)
+      NId = -(NId + 1);
+
+    bool Found = false;
     while (!Worklist.empty()) {
       const SDNode *M = Worklist.pop_back_val();
-      bool Found = false;
+      int MId = M->getNodeId();
+      if (TopologicalPrune && M->getOpcode() != ISD::TokenFactor && (NId > 0) &&
+          (MId > 0) && (MId < NId)) {
+        DeferredNodes.push_back(M);
+        continue;
+      }
       for (const SDValue &OpV : M->op_values()) {
         SDNode *Op = OpV.getNode();
         if (Visited.insert(Op).second)
@@ -814,11 +873,16 @@ public:
           Found = true;
       }
       if (Found)
-        return true;
+        break;
       if (MaxSteps != 0 && Visited.size() >= MaxSteps)
-        return false;
+        break;
     }
-    return false;
+    // Push deferred nodes back on worklist.
+    Worklist.append(DeferredNodes.begin(), DeferredNodes.end());
+    // If we bailed early, conservatively return found.
+    if (MaxSteps != 0 && Visited.size() >= MaxSteps)
+      return true;
+    return Found;
   }
 
   /// Return true if all the users of N are contained in Nodes.
@@ -884,6 +948,7 @@ public:
 
   const SDNodeFlags getFlags() const { return Flags; }
   void setFlags(SDNodeFlags NewFlags) { Flags = NewFlags; }
+  bool isFast() { return Flags.isFast(); }
 
   /// Clear any flags in this node that aren't also set in Flags.
   /// If Flags is not in a defined state then this has no effect.
@@ -1089,10 +1154,18 @@ inline const DebugLoc &SDValue::getDebugLoc() const {
   return Node->getDebugLoc();
 }
 
+inline void SDValue::dump() const {
+  return Node->dump();
+}
+
 inline void SDValue::dump(const SelectionDAG *G) const {
   return Node->dump(G);
 }
 
+inline void SDValue::dumpr() const {
+  return Node->dumpr();
+}
+
 inline void SDValue::dumpr(const SelectionDAG *G) const {
   return Node->dumpr(G);
 }
@@ -1173,7 +1246,7 @@ protected:
 
 public:
   MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTs,
-            EVT MemoryVT, MachineMemOperand *MMO);
+            EVT memvt, MachineMemOperand *MMO);
 
   bool readMem() const { return MMO->isLoad(); }
   bool writeMem() const { return MMO->isStore(); }
@@ -1190,7 +1263,8 @@ public:
   /// encoding of the volatile flag, as well as bits used by subclasses. This
   /// function should only be used to compute a FoldingSetNodeID value.
   /// The HasDebugValue bit is masked out because CSE map needs to match
-  /// nodes with debug info with nodes without debug info.
+  /// nodes with debug info with nodes without debug info. Same is about
+  /// isDivergent bit.
   unsigned getRawSubclassData() const {
     uint16_t Data;
     union {
@@ -1199,6 +1273,7 @@ public:
     };
     memcpy(&RawSDNodeBits, &this->RawSDNodeBits, sizeof(this->RawSDNodeBits));
     SDNodeBits.HasDebugValue = 0;
+    SDNodeBits.IsDivergent = false;
     memcpy(&Data, &RawSDNodeBits, sizeof(RawSDNodeBits));
     return Data;
   }
@@ -1267,6 +1342,7 @@ public:
            N->getOpcode() == ISD::ATOMIC_LOAD_ADD     ||
            N->getOpcode() == ISD::ATOMIC_LOAD_SUB     ||
            N->getOpcode() == ISD::ATOMIC_LOAD_AND     ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_CLR     ||
            N->getOpcode() == ISD::ATOMIC_LOAD_OR      ||
            N->getOpcode() == ISD::ATOMIC_LOAD_XOR     ||
            N->getOpcode() == ISD::ATOMIC_LOAD_NAND    ||
@@ -1318,6 +1394,7 @@ public:
            N->getOpcode() == ISD::ATOMIC_LOAD_ADD     ||
            N->getOpcode() == ISD::ATOMIC_LOAD_SUB     ||
            N->getOpcode() == ISD::ATOMIC_LOAD_AND     ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_CLR     ||
            N->getOpcode() == ISD::ATOMIC_LOAD_OR      ||
            N->getOpcode() == ISD::ATOMIC_LOAD_XOR     ||
            N->getOpcode() == ISD::ATOMIC_LOAD_NAND    ||
@@ -1421,9 +1498,8 @@ class ConstantSDNode : public SDNode {
 
   const ConstantInt *Value;
 
-  ConstantSDNode(bool isTarget, bool isOpaque, const ConstantInt *val,
-                 const DebugLoc &DL, EVT VT)
-      : SDNode(isTarget ? ISD::TargetConstant : ISD::Constant, 0, DL,
+  ConstantSDNode(bool isTarget, bool isOpaque, const ConstantInt *val, EVT VT)
+      : SDNode(isTarget ? ISD::TargetConstant : ISD::Constant, 0, DebugLoc(),
                getSDVTList(VT)),
         Value(val) {
     ConstantSDNodeBits.IsOpaque = isOpaque;
@@ -1459,10 +1535,9 @@ class ConstantFPSDNode : public SDNode {
 
   const ConstantFP *Value;
 
-  ConstantFPSDNode(bool isTarget, const ConstantFP *val, const DebugLoc &DL,
-                   EVT VT)
-      : SDNode(isTarget ? ISD::TargetConstantFP : ISD::ConstantFP, 0, DL,
-               getSDVTList(VT)),
+  ConstantFPSDNode(bool isTarget, const ConstantFP *val, EVT VT)
+      : SDNode(isTarget ? ISD::TargetConstantFP : ISD::ConstantFP, 0,
+               DebugLoc(), getSDVTList(VT)),
         Value(val) {}
 
 public:
@@ -1519,10 +1594,10 @@ bool isOneConstant(SDValue V);
 bool isBitwiseNot(SDValue V);
 
 /// Returns the SDNode if it is a constant splat BuildVector or constant int.
-ConstantSDNode *isConstOrConstSplat(SDValue V);
+ConstantSDNode *isConstOrConstSplat(SDValue N);
 
 /// Returns the SDNode if it is a constant splat BuildVector or constant float.
-ConstantFPSDNode *isConstOrConstSplatFP(SDValue V);
+ConstantFPSDNode *isConstOrConstSplatFP(SDValue N);
 
 class GlobalAddressSDNode : public SDNode {
   friend class SelectionDAG;
@@ -1533,7 +1608,7 @@ class GlobalAddressSDNode : public SDNode {
 
   GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL,
                       const GlobalValue *GA, EVT VT, int64_t o,
-                      unsigned char TargetFlags);
+                      unsigned char TF);
 
 public:
   const GlobalValue *getGlobal() const { return TheGlobal; }
@@ -1714,13 +1789,13 @@ public:
                        unsigned MinSplatBits = 0,
                        bool isBigEndian = false) const;
 
-  /// \brief Returns the splatted value or a null value if this is not a splat.
+  /// Returns the splatted value or a null value if this is not a splat.
   ///
   /// If passed a non-null UndefElements bitvector, it will resize it to match
   /// the vector width and set the bits where elements are undef.
   SDValue getSplatValue(BitVector *UndefElements = nullptr) const;
 
-  /// \brief Returns the splatted constant or null if this is not a constant
+  /// Returns the splatted constant or null if this is not a constant
   /// splat.
   ///
   /// If passed a non-null UndefElements bitvector, it will resize it to match
@@ -1728,7 +1803,7 @@ public:
   ConstantSDNode *
   getConstantSplatNode(BitVector *UndefElements = nullptr) const;
 
-  /// \brief Returns the splatted constant FP or null if this is not a constant
+  /// Returns the splatted constant FP or null if this is not a constant
   /// FP splat.
   ///
   /// If passed a non-null UndefElements bitvector, it will resize it to match
@@ -1736,7 +1811,7 @@ public:
   ConstantFPSDNode *
   getConstantFPSplatNode(BitVector *UndefElements = nullptr) const;
 
-  /// \brief If this is a constant FP splat and the splatted constant FP is an
+  /// If this is a constant FP splat and the splatted constant FP is an
   /// exact power or 2, return the log base 2 integer value.  Otherwise,
   /// return -1.
   ///
@@ -2120,13 +2195,14 @@ public:
       : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {}
 
   // In the both nodes address is Op1, mask is Op2:
-  // MaskedGatherSDNode  (Chain, src0, mask, base, index), src0 is a passthru value
-  // MaskedScatterSDNode (Chain, value, mask, base, index)
+  // MaskedGatherSDNode  (Chain, passthru, mask, base, index, scale)
+  // MaskedScatterSDNode (Chain, value, mask, base, index, scale)
   // Mask is a vector of i1 elements
   const SDValue &getBasePtr() const { return getOperand(3); }
   const SDValue &getIndex()   const { return getOperand(4); }
   const SDValue &getMask()    const { return getOperand(2); }
   const SDValue &getValue()   const { return getOperand(1); }
+  const SDValue &getScale()   const { return getOperand(5); }
 
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::MGATHER ||
@@ -2329,6 +2405,17 @@ namespace ISD {
       cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
   }
 
+  /// Attempt to match a unary predicate against a scalar/splat constant or
+  /// every element of a constant BUILD_VECTOR.
+  bool matchUnaryPredicate(SDValue Op,
+                           std::function<bool(ConstantSDNode *)> Match);
+
+  /// Attempt to match a binary predicate against a pair of scalar/splat
+  /// constants or every element of a pair of constant BUILD_VECTORs.
+  bool matchBinaryPredicate(
+      SDValue LHS, SDValue RHS,
+      std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match);
+
 } // end namespace ISD
 
 } // end namespace llvm
diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h
index 3a91e363f923..334267d9828b 100644
--- a/include/llvm/CodeGen/SlotIndexes.h
+++ b/include/llvm/CodeGen/SlotIndexes.h
@@ -578,9 +578,9 @@ class raw_ostream;
       assert(!MI.isInsideBundle() &&
              "Instructions inside bundles should use bundle start's slot.");
       assert(mi2iMap.find(&MI) == mi2iMap.end() && "Instr already indexed.");
-      // Numbering DBG_VALUE instructions could cause code generation to be
+      // Numbering debug instructions could cause code generation to be
       // affected by debug information.
-      assert(!MI.isDebugValue() && "Cannot number DBG_VALUE instructions.");
+      assert(!MI.isDebugInstr() && "Cannot number debug instructions.");
 
       assert(MI.getParent() != nullptr && "Instr must be added to function.");
 
@@ -674,10 +674,10 @@ class raw_ostream;
       idx2MBBMap.push_back(IdxMBBPair(startIdx, mbb));
 
       renumberIndexes(newItr);
-      std::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare());
+      llvm::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare());
     }
 
-    /// \brief Free the resources that were required to maintain a SlotIndex.
+    /// Free the resources that were required to maintain a SlotIndex.
     ///
     /// Once an index is no longer needed (for instance because the instruction
     /// at that index has been moved), the resources required to maintain the
diff --git a/include/llvm/CodeGen/StackMaps.h b/include/llvm/CodeGen/StackMaps.h
index 4407114d2741..3c9850265737 100644
--- a/include/llvm/CodeGen/StackMaps.h
+++ b/include/llvm/CodeGen/StackMaps.h
@@ -29,7 +29,7 @@ class MCStreamer;
 class raw_ostream;
 class TargetRegisterInfo;
 
-/// \brief MI-level stackmap operands.
+/// MI-level stackmap operands.
 ///
 /// MI stackmap operations take the form:
 /// <id>, <numBytes>, live args...
@@ -60,7 +60,7 @@ public:
   }
 };
 
-/// \brief MI-level patchpoint operands.
+/// MI-level patchpoint operands.
 ///
 /// MI patchpoint operations take the form:
 /// [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ...
@@ -137,7 +137,7 @@ public:
     return getVarIdx();
   }
 
-  /// \brief Get the next scratch register operand index.
+  /// Get the next scratch register operand index.
   unsigned getNextScratchIdx(unsigned StartIdx = 0) const;
 };
 
@@ -236,15 +236,15 @@ public:
     FnInfos.clear();
   }
 
-  /// \brief Generate a stackmap record for a stackmap instruction.
+  /// Generate a stackmap record for a stackmap instruction.
   ///
   /// MI must be a raw STACKMAP, not a PATCHPOINT.
   void recordStackMap(const MachineInstr &MI);
 
-  /// \brief Generate a stackmap record for a patchpoint instruction.
+  /// Generate a stackmap record for a patchpoint instruction.
   void recordPatchPoint(const MachineInstr &MI);
 
-  /// \brief Generate a stackmap record for a statepoint instruction.
+  /// Generate a stackmap record for a statepoint instruction.
   void recordStatepoint(const MachineInstr &MI);
 
   /// If there is any stack map data, create a stack map section and serialize
@@ -293,11 +293,11 @@ private:
                MachineInstr::const_mop_iterator MOE, LocationVec &Locs,
                LiveOutVec &LiveOuts) const;
 
-  /// \brief Create a live-out register record for the given register @p Reg.
+  /// Create a live-out register record for the given register @p Reg.
   LiveOutReg createLiveOutReg(unsigned Reg,
                               const TargetRegisterInfo *TRI) const;
 
-  /// \brief Parse the register live-out mask and return a vector of live-out
+  /// Parse the register live-out mask and return a vector of live-out
   /// registers that need to be recorded in the stackmap.
   LiveOutVec parseRegisterLiveOutMask(const uint32_t *Mask) const;
 
@@ -311,16 +311,16 @@ private:
                            MachineInstr::const_mop_iterator MOE,
                            bool recordResult = false);
 
-  /// \brief Emit the stackmap header.
+  /// Emit the stackmap header.
   void emitStackmapHeader(MCStreamer &OS);
 
-  /// \brief Emit the function frame record for each function.
+  /// Emit the function frame record for each function.
   void emitFunctionFrameRecords(MCStreamer &OS);
 
-  /// \brief Emit the constant pool.
+  /// Emit the constant pool.
   void emitConstantPoolEntries(MCStreamer &OS);
 
-  /// \brief Emit the callsite info for each stackmap/patchpoint intrinsic call.
+  /// Emit the callsite info for each stackmap/patchpoint intrinsic call.
   void emitCallsiteEntries(MCStreamer &OS);
 
   void print(raw_ostream &OS);
diff --git a/include/llvm/CodeGen/StackProtector.h b/include/llvm/CodeGen/StackProtector.h
index 72de212d0df9..a506ac636a17 100644
--- a/include/llvm/CodeGen/StackProtector.h
+++ b/include/llvm/CodeGen/StackProtector.h
@@ -19,6 +19,7 @@
 
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/ValueMap.h"
 #include "llvm/Pass.h"
@@ -35,24 +36,11 @@ class TargetMachine;
 class Type;
 
 class StackProtector : public FunctionPass {
-public:
-  /// SSPLayoutKind.  Stack Smashing Protection (SSP) rules require that
-  /// vulnerable stack allocations are located close the stack protector.
-  enum SSPLayoutKind {
-    SSPLK_None,       ///< Did not trigger a stack protector.  No effect on data
-                      ///< layout.
-    SSPLK_LargeArray, ///< Array or nested array >= SSP-buffer-size.  Closest
-                      ///< to the stack protector.
-    SSPLK_SmallArray, ///< Array or nested array < SSP-buffer-size. 2nd closest
-                      ///< to the stack protector.
-    SSPLK_AddrOf      ///< The address of this allocation is exposed and
-                      ///< triggered protection.  3rd closest to the protector.
-  };
-
+private:
   /// A mapping of AllocaInsts to their required SSP layout.
-  using SSPLayoutMap = ValueMap<const AllocaInst *, SSPLayoutKind>;
+  using SSPLayoutMap = DenseMap<const AllocaInst *,
+                                MachineFrameInfo::SSPLayoutKind>;
 
-private:
   const TargetMachine *TM = nullptr;
 
   /// TLI - Keep a pointer of a TargetLowering to consult for determining
@@ -70,7 +58,7 @@ private:
   /// AllocaInst triggers a stack protector.
   SSPLayoutMap Layout;
 
-  /// \brief The minimum size of buffers that will receive stack smashing
+  /// The minimum size of buffers that will receive stack smashing
   /// protection when -fstack-protection is used.
   unsigned SSPBufferSize = 0;
 
@@ -107,7 +95,7 @@ private:
   bool ContainsProtectableArray(Type *Ty, bool &IsLarge, bool Strong = false,
                                 bool InStruct = false) const;
 
-  /// \brief Check whether a stack allocation has its address taken.
+  /// Check whether a stack allocation has its address taken.
   bool HasAddressTaken(const Instruction *AI);
 
   /// RequiresStackProtector - Check whether or not this function needs a
@@ -123,14 +111,12 @@ public:
 
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 
-  SSPLayoutKind getSSPLayout(const AllocaInst *AI) const;
-
   // Return true if StackProtector is supposed to be handled by SelectionDAG.
   bool shouldEmitSDCheck(const BasicBlock &BB) const;
 
-  void adjustForColoring(const AllocaInst *From, const AllocaInst *To);
-
   bool runOnFunction(Function &Fn) override;
+
+  void copyToMachineFrameInfo(MachineFrameInfo &MFI) const;
 };
 
 } // end namespace llvm
diff --git a/include/llvm/CodeGen/TargetCallingConv.h b/include/llvm/CodeGen/TargetCallingConv.h
index 8646a15599cb..7d138f585171 100644
--- a/include/llvm/CodeGen/TargetCallingConv.h
+++ b/include/llvm/CodeGen/TargetCallingConv.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_CODEGEN_TARGETCALLINGCONV_H
 #define LLVM_CODEGEN_TARGETCALLINGCONV_H
 
-#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/MachineValueType.h"
 #include "llvm/Support/MathExtras.h"
 #include <cassert>
 #include <climits>
diff --git a/include/llvm/CodeGen/TargetFrameLowering.h b/include/llvm/CodeGen/TargetFrameLowering.h
index 61f1cf07bcf2..f8effee998e3 100644
--- a/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/include/llvm/CodeGen/TargetFrameLowering.h
@@ -158,6 +158,10 @@ public:
     return false;
   }
 
+  /// Returns true if the target can safely skip saving callee-saved registers
+  /// for noreturn nounwind functions.
+  virtual bool enableCalleeSaveSkip(const MachineFunction &MF) const;
+
   /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
   /// the function.
   virtual void emitPrologue(MachineFunction &MF,
@@ -341,6 +345,14 @@ public:
           return false;
     return true;
   }
+
+  /// Return initial CFA offset value i.e. the one valid at the beginning of the
+  /// function (before any stack operations).
+  virtual int getInitialCFAOffset(const MachineFunction &MF) const;
+
+  /// Return initial CFA register value i.e. the one valid at the beginning of
+  /// the function (before any stack operations).
+  virtual unsigned getInitialCFARegister(const MachineFunction &MF) const;
 };
 
 } // End llvm namespace
diff --git a/include/llvm/CodeGen/TargetInstrInfo.h b/include/llvm/CodeGen/TargetInstrInfo.h
index 38a1b33aecad..b5bc561d834c 100644
--- a/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/include/llvm/CodeGen/TargetInstrInfo.h
@@ -18,12 +18,14 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/None.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineCombinerPattern.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineOutliner.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/Support/BranchProbability.h"
@@ -79,7 +81,7 @@ public:
 
   /// Given a machine instruction descriptor, returns the register
   /// class constraint for OpNum, or NULL.
-  const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum,
+  const TargetRegisterClass *getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
                                          const TargetRegisterInfo *TRI,
                                          const MachineFunction &MF) const;
 
@@ -225,6 +227,17 @@ public:
     return 0;
   }
 
+  /// Optional extension of isLoadFromStackSlot that returns the number of
+  /// bytes loaded from the stack. This must be implemented if a backend
+  /// supports partial stack slot spills/loads to further disambiguate
+  /// what the load does.
+  virtual unsigned isLoadFromStackSlot(const MachineInstr &MI,
+                                       int &FrameIndex,
+                                       unsigned &MemBytes) const {
+    MemBytes = 0;
+    return isLoadFromStackSlot(MI, FrameIndex);
+  }
+
   /// Check for post-frame ptr elimination stack locations as well.
   /// This uses a heuristic so it isn't reliable for correctness.
   virtual unsigned isLoadFromStackSlotPostFE(const MachineInstr &MI,
@@ -252,6 +265,17 @@ public:
     return 0;
   }
 
+  /// Optional extension of isStoreToStackSlot that returns the number of
+  /// bytes stored to the stack. This must be implemented if a backend
+  /// supports partial stack slot spills/loads to further disambiguate
+  /// what the store does.
+  virtual unsigned isStoreToStackSlot(const MachineInstr &MI,
+                                      int &FrameIndex,
+                                      unsigned &MemBytes) const {
+    MemBytes = 0;
+    return isStoreToStackSlot(MI, FrameIndex);
+  }
+
   /// Check for post-frame ptr elimination stack locations as well.
   /// This uses a heuristic, so it isn't reliable for correctness.
   virtual unsigned isStoreToStackSlotPostFE(const MachineInstr &MI,
@@ -325,7 +349,7 @@ public:
                              unsigned SubIdx, const MachineInstr &Orig,
                              const TargetRegisterInfo &TRI) const;
 
-  /// \brief Clones instruction or the whole instruction bundle \p Orig and
+  /// Clones instruction or the whole instruction bundle \p Orig and
   /// insert into \p MBB before \p InsertBefore. The target may update operands
   /// that are required to be unique.
   ///
@@ -421,7 +445,8 @@ public:
   /// Build the equivalent inputs of a REG_SEQUENCE for the given \p MI
   /// and \p DefIdx.
   /// \p [out] InputRegs of the equivalent REG_SEQUENCE. Each element of
-  /// the list is modeled as <Reg:SubReg, SubIdx>.
+  /// the list is modeled as <Reg:SubReg, SubIdx>. Operands with the undef
+  /// flag are not added to this list.
   /// E.g., REG_SEQUENCE %1:sub1, sub0, %2, sub1 would produce
   /// two elements:
   /// - %1:sub1, sub0
@@ -446,7 +471,8 @@ public:
   /// - %1:sub1, sub0
   ///
   /// \returns true if it is possible to build such an input sequence
-  /// with the pair \p MI, \p DefIdx. False otherwise.
+  /// with the pair \p MI, \p DefIdx and the operand has no undef flag set.
+  /// False otherwise.
   ///
   /// \pre MI.isExtractSubreg() or MI.isExtractSubregLike().
   ///
@@ -465,7 +491,8 @@ public:
   /// - InsertedReg: %1:sub1, sub3
   ///
   /// \returns true if it is possible to build such an input sequence
-  /// with the pair \p MI, \p DefIdx. False otherwise.
+  /// with the pair \p MI, \p DefIdx and the operand has no undef flag set.
+  /// False otherwise.
   ///
   /// \pre MI.isInsertSubreg() or MI.isInsertSubregLike().
   ///
@@ -632,8 +659,8 @@ public:
     return true;
   }
 
-  /// Generate code to reduce the loop iteration by one and check if the loop is
-  /// finished.  Return the value/register of the the new loop count.  We need
+  /// Generate code to reduce the loop iteration by one and check if the loop
+  /// is finished.  Return the value/register of the new loop count.  We need
   /// this function when peeling off one or more iterations of a loop. This
   /// function assumes the nth iteration is peeled first.
   virtual unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineInstr *IndVar,
@@ -819,6 +846,15 @@ public:
     llvm_unreachable("Target didn't implement TargetInstrInfo::copyPhysReg!");
   }
 
+  /// If the specific machine instruction is a instruction that moves/copies
+  /// value from one register to another register return true along with
+  /// @Source machine operand and @Destination machine operand.
+  virtual bool isCopyInstr(const MachineInstr &MI,
+                           const MachineOperand *&SourceOpNum,
+                           const MachineOperand *&Destination) const {
+    return false;
+  }
+
   /// Store the specified register of the given register class to the specified
   /// stack frame index. The store instruction is to be added to the given
   /// machine basic block before the specified machine instruction. If isKill
@@ -873,7 +909,7 @@ public:
   /// The new instruction is inserted before MI, and the client is responsible
   /// for removing the old instruction.
   MachineInstr *foldMemoryOperand(MachineInstr &MI, ArrayRef<unsigned> Ops,
-                                  int FrameIndex,
+                                  int FI,
                                   LiveIntervals *LIS = nullptr) const;
 
   /// Same as the previous version except it allows folding of any load and
@@ -925,13 +961,13 @@ public:
   /// \param InsInstrs - Vector of new instructions that implement P
   /// \param DelInstrs - Old instructions, including Root, that could be
   /// replaced by InsInstr
-  /// \param InstrIdxForVirtReg - map of virtual register to instruction in
+  /// \param InstIdxForVirtReg - map of virtual register to instruction in
   /// InsInstr that defines it
   virtual void genAlternativeCodeSequence(
       MachineInstr &Root, MachineCombinerPattern Pattern,
       SmallVectorImpl<MachineInstr *> &InsInstrs,
       SmallVectorImpl<MachineInstr *> &DelInstrs,
-      DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const;
+      DenseMap<unsigned, unsigned> &InstIdxForVirtReg) const;
 
   /// Attempt to reassociate \P Root and \P Prev according to \P Pattern to
   /// reduce critical path length.
@@ -950,6 +986,10 @@ public:
   /// Return true when a target supports MachineCombiner.
   virtual bool useMachineCombiner() const { return false; }
 
+  /// Return true if the given SDNode can be copied during scheduling
+  /// even if it has glue.
+  virtual bool canCopyGluedNodeDuringSchedule(SDNode *N) const { return false; }
+
 protected:
   /// Target-dependent implementation for foldMemoryOperand.
   /// Target-independent code in foldMemoryOperand will
@@ -976,7 +1016,7 @@ protected:
     return nullptr;
   }
 
-  /// \brief Target-dependent implementation of getRegSequenceInputs.
+  /// Target-dependent implementation of getRegSequenceInputs.
   ///
   /// \returns true if it is possible to build the equivalent
   /// REG_SEQUENCE inputs with the pair \p MI, \p DefIdx. False otherwise.
@@ -990,7 +1030,7 @@ protected:
     return false;
   }
 
-  /// \brief Target-dependent implementation of getExtractSubregInputs.
+  /// Target-dependent implementation of getExtractSubregInputs.
   ///
   /// \returns true if it is possible to build the equivalent
   /// EXTRACT_SUBREG inputs with the pair \p MI, \p DefIdx. False otherwise.
@@ -1004,7 +1044,7 @@ protected:
     return false;
   }
 
-  /// \brief Target-dependent implementation of getInsertSubregInputs.
+  /// Target-dependent implementation of getInsertSubregInputs.
   ///
   /// \returns true if it is possible to build the equivalent
   /// INSERT_SUBREG inputs with the pair \p MI, \p DefIdx. False otherwise.
@@ -1426,7 +1466,7 @@ public:
     return 0;
   }
 
-  /// \brief Return the minimum clearance before an instruction that reads an
+  /// Return the minimum clearance before an instruction that reads an
   /// unused register.
   ///
   /// For example, AVX instructions may copy part of a register operand into
@@ -1493,7 +1533,7 @@ public:
     return false;
   }
 
-  /// \brief Return the value to use for the MachineCSE's LookAheadLimit,
+  /// Return the value to use for the MachineCSE's LookAheadLimit,
   /// which is a heuristic used for CSE'ing phys reg defs.
   virtual unsigned getMachineCSELookAheadLimit() const {
     // The default lookahead is small to prevent unprofitable quadratic
@@ -1562,64 +1602,32 @@ public:
     return false;
   }
 
-  /// \brief Describes the number of instructions that it will take to call and
-  /// construct a frame for a given outlining candidate.
-  struct MachineOutlinerInfo {
-    /// Number of instructions to call an outlined function for this candidate.
-    unsigned CallOverhead;
-
-    /// \brief Number of instructions to construct an outlined function frame
-    /// for this candidate.
-    unsigned FrameOverhead;
-
-    /// \brief Represents the specific instructions that must be emitted to
-    /// construct a call to this candidate.
-    unsigned CallConstructionID;
-
-    /// \brief Represents the specific instructions that must be emitted to
-    /// construct a frame for this candidate's outlined function.
-    unsigned FrameConstructionID;
-
-    MachineOutlinerInfo() {}
-    MachineOutlinerInfo(unsigned CallOverhead, unsigned FrameOverhead,
-                        unsigned CallConstructionID,
-                        unsigned FrameConstructionID)
-        : CallOverhead(CallOverhead), FrameOverhead(FrameOverhead),
-          CallConstructionID(CallConstructionID),
-          FrameConstructionID(FrameConstructionID) {}
-  };
-
-  /// \brief Returns a \p MachineOutlinerInfo struct containing target-specific
+  /// Returns a \p outliner::OutlinedFunction struct containing target-specific
   /// information for a set of outlining candidates.
-  virtual MachineOutlinerInfo getOutlininingCandidateInfo(
-      std::vector<
-          std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
-          &RepeatedSequenceLocs) const {
+  virtual outliner::OutlinedFunction getOutliningCandidateInfo(
+      std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
     llvm_unreachable(
-        "Target didn't implement TargetInstrInfo::getOutliningOverhead!");
+        "Target didn't implement TargetInstrInfo::getOutliningCandidateInfo!");
   }
 
-  /// Represents how an instruction should be mapped by the outliner.
-  /// \p Legal instructions are those which are safe to outline.
-  /// \p Illegal instructions are those which cannot be outlined.
-  /// \p Invisible instructions are instructions which can be outlined, but
-  /// shouldn't actually impact the outlining result.
-  enum MachineOutlinerInstrType { Legal, Illegal, Invisible };
-
   /// Returns how or if \p MI should be outlined.
-  virtual MachineOutlinerInstrType getOutliningType(MachineInstr &MI) const {
+  virtual outliner::InstrType
+  getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const {
     llvm_unreachable(
         "Target didn't implement TargetInstrInfo::getOutliningType!");
   }
 
-  /// Insert a custom epilogue for outlined functions.
-  /// This may be empty, in which case no epilogue or return statement will be
-  /// emitted.
-  virtual void insertOutlinerEpilogue(MachineBasicBlock &MBB,
-                                      MachineFunction &MF,
-                                      const MachineOutlinerInfo &MInfo) const {
+  /// Returns target-defined flags defining properties of the MBB for
+  /// the outliner.
+  virtual unsigned getMachineOutlinerMBBFlags(MachineBasicBlock &MBB) const {
+    return 0x0;
+  }
+
+  /// Insert a custom frame for outlined functions.
+  virtual void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF,
+                                  const outliner::OutlinedFunction &OF) const {
     llvm_unreachable(
-        "Target didn't implement TargetInstrInfo::insertOutlinerEpilogue!");
+        "Target didn't implement TargetInstrInfo::buildOutlinedFrame!");
   }
 
   /// Insert a call to an outlined function into the program.
@@ -1628,20 +1636,11 @@ public:
   virtual MachineBasicBlock::iterator
   insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
                      MachineBasicBlock::iterator &It, MachineFunction &MF,
-                     const MachineOutlinerInfo &MInfo) const {
+                     const outliner::Candidate &C) const {
     llvm_unreachable(
         "Target didn't implement TargetInstrInfo::insertOutlinedCall!");
   }
 
-  /// Insert a custom prologue for outlined functions.
-  /// This may be empty, in which case no prologue will be emitted.
-  virtual void insertOutlinerPrologue(MachineBasicBlock &MBB,
-                                      MachineFunction &MF,
-                                      const MachineOutlinerInfo &MInfo) const {
-    llvm_unreachable(
-        "Target didn't implement TargetInstrInfo::insertOutlinerPrologue!");
-  }
-
   /// Return true if the function can safely be outlined from.
   /// A function \p MF is considered safe for outlining if an outlined function
   /// produced from instructions in F will produce a program which produces the
@@ -1652,13 +1651,18 @@ public:
                      "TargetInstrInfo::isFunctionSafeToOutlineFrom!");
   }
 
+  /// Return true if the function should be outlined from by default.
+  virtual bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const {
+    return false;
+  }
+
 private:
   unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode;
   unsigned CatchRetOpcode;
   unsigned ReturnOpcode;
 };
 
-/// \brief Provide DenseMapInfo for TargetInstrInfo::RegSubRegPair.
+/// Provide DenseMapInfo for TargetInstrInfo::RegSubRegPair.
 template <> struct DenseMapInfo<TargetInstrInfo::RegSubRegPair> {
   using RegInfo = DenseMapInfo<unsigned>;
 
@@ -1672,7 +1676,7 @@ template <> struct DenseMapInfo<TargetInstrInfo::RegSubRegPair> {
                                           RegInfo::getTombstoneKey());
   }
 
-  /// \brief Reuse getHashValue implementation from
+  /// Reuse getHashValue implementation from
   /// std::pair<unsigned, unsigned>.
   static unsigned getHashValue(const TargetInstrInfo::RegSubRegPair &Val) {
     std::pair<unsigned, unsigned> PairVal = std::make_pair(Val.Reg, Val.SubReg);
diff --git a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h
index 380e3b19dc80..d5ff71cf9ac2 100644
--- a/include/llvm/CodeGen/TargetLowering.h
+++ b/include/llvm/CodeGen/TargetLowering.h
@@ -29,9 +29,9 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/DivergenceAnalysis.h"
 #include "llvm/CodeGen/DAGCombine.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/CodeGen/RuntimeLibcalls.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -52,6 +52,7 @@
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
 #include "llvm/Target/TargetMachine.h"
 #include <algorithm>
 #include <cassert>
@@ -222,7 +223,7 @@ public:
   virtual ~TargetLoweringBase() = default;
 
 protected:
-  /// \brief Initialize all of the actions to default values.
+  /// Initialize all of the actions to default values.
   void initActions();
 
 public:
@@ -253,7 +254,8 @@ public:
   /// A documentation for this function would be nice...
   virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const;
 
-  EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const;
+  EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL,
+                       bool LegalTypes = true) const;
 
   /// Returns the type to be used for the index operand of:
   /// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
@@ -421,17 +423,17 @@ public:
     return true;
   }
 
-  /// \brief Return true if it is cheap to speculate a call to intrinsic cttz.
+  /// Return true if it is cheap to speculate a call to intrinsic cttz.
   virtual bool isCheapToSpeculateCttz() const {
     return false;
   }
 
-  /// \brief Return true if it is cheap to speculate a call to intrinsic ctlz.
+  /// Return true if it is cheap to speculate a call to intrinsic ctlz.
   virtual bool isCheapToSpeculateCtlz() const {
     return false;
   }
 
-  /// \brief Return true if ctlz instruction is fast.
+  /// Return true if ctlz instruction is fast.
   virtual bool isCtlzFast() const {
     return false;
   }
@@ -444,13 +446,13 @@ public:
     return false;
   }
 
-  /// \brief Return true if it is cheaper to split the store of a merged int val
+  /// Return true if it is cheaper to split the store of a merged int val
   /// from a pair of smaller values into multiple stores.
   virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const {
     return false;
   }
 
-  /// \brief Return if the target supports combining a
+  /// Return if the target supports combining a
   /// chain like:
   /// \code
   ///   %andResult = and %val1, #mask
@@ -507,7 +509,30 @@ public:
     return hasAndNotCompare(X);
   }
 
-  /// \brief Return true if the target wants to use the optimization that
+  /// There are two ways to clear extreme bits (either low or high):
+  /// Mask:    x &  (-1 << y)  (the instcombine canonical form)
+  /// Shifts:  x >> y << y
+  /// Return true if the variant with 2 shifts is preferred.
+  /// Return false if there is no preference.
+  virtual bool preferShiftsToClearExtremeBits(SDValue X) const {
+    // By default, let's assume that no one prefers shifts.
+    return false;
+  }
+
+  /// Should we tranform the IR-optimal check for whether given truncation
+  /// down into KeptBits would be truncating or not:
+  ///   (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
+  /// Into it's more traditional form:
+  ///   ((%x << C) a>> C) dstcond %x
+  /// Return true if we should transform.
+  /// Return false if there is no preference.
+  virtual bool shouldTransformSignedTruncationCheck(EVT XVT,
+                                                    unsigned KeptBits) const {
+    // By default, let's assume that no one prefers shifts.
+    return false;
+  }
+
+  /// Return true if the target wants to use the optimization that
   /// turns ext(promotableInst1(...(promotableInstN(load)))) into
   /// promotedInst1(...(promotedInstN(ext(load)))).
   bool enableExtLdPromotion() const { return EnableExtLdPromotion; }
@@ -746,10 +771,10 @@ public:
   /// operations don't trap except for integer divide and remainder.
   virtual bool canOpTrap(unsigned Op, EVT VT) const;
 
-  /// Similar to isShuffleMaskLegal. This is used by Targets can use this to
-  /// indicate if there is a suitable VECTOR_SHUFFLE that can be used to replace
-  /// a VAND with a constant pool entry.
-  virtual bool isVectorClearMaskLegal(const SmallVectorImpl<int> &/*Mask*/,
+  /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
+  /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
+  /// constant pool entry.
+  virtual bool isVectorClearMaskLegal(ArrayRef<int> /*Mask*/,
                                       EVT /*VT*/) const {
     return false;
   }
@@ -765,6 +790,39 @@ public:
     return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op];
   }
 
+  LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const {
+    unsigned EqOpc;
+    switch (Op) {
+      default: llvm_unreachable("Unexpected FP pseudo-opcode");
+      case ISD::STRICT_FADD: EqOpc = ISD::FADD; break;
+      case ISD::STRICT_FSUB: EqOpc = ISD::FSUB; break;
+      case ISD::STRICT_FMUL: EqOpc = ISD::FMUL; break;
+      case ISD::STRICT_FDIV: EqOpc = ISD::FDIV; break;
+      case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break;
+      case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break;
+      case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break;
+      case ISD::STRICT_FMA: EqOpc = ISD::FMA; break;
+      case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break;
+      case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break;
+      case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break;
+      case ISD::STRICT_FEXP2: EqOpc = ISD::FEXP2; break;
+      case ISD::STRICT_FLOG: EqOpc = ISD::FLOG; break;
+      case ISD::STRICT_FLOG10: EqOpc = ISD::FLOG10; break;
+      case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break;
+      case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break;
+      case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break;
+    }
+
+    auto Action = getOperationAction(EqOpc, VT);
+
+    // We don't currently handle Custom or Promote for strict FP pseudo-ops.
+    // For now, we just expand for those cases.
+    if (Action != Legal)
+      Action = Expand;
+
+    return Action;
+  }
+
   /// Return true if the specified operation is legal on this target or can be
   /// made legal with custom lowering. This is used to help guide high-level
   /// lowering decisions.
@@ -800,7 +858,7 @@ public:
   }
 
   /// Return true if lowering to a jump table is allowed.
-  bool areJTsAllowed(const Function *Fn) const {
+  virtual bool areJTsAllowed(const Function *Fn) const {
     if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true")
       return false;
 
@@ -812,7 +870,7 @@ public:
   bool rangeFitsInWord(const APInt &Low, const APInt &High,
                        const DataLayout &DL) const {
     // FIXME: Using the pointer type doesn't seem ideal.
-    uint64_t BW = DL.getPointerSizeInBits();
+    uint64_t BW = DL.getIndexSizeInBits(0u);
     uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1;
     return Range <= BW;
   }
@@ -820,7 +878,7 @@ public:
   /// Return true if lowering to a jump table is suitable for a set of case
   /// clusters which may contain \p NumCases cases, \p Range range of values.
   /// FIXME: This function check the maximum table size and density, but the
-  /// minimum size is not checked. It would be nice if the the minimum size is
+  /// minimum size is not checked. It would be nice if the minimum size is
   /// also combined within this function. Currently, the minimum size check is
   /// performed in findJumpTable() in SelectionDAGBuiler and
   /// getEstimatedNumberOfCaseClusters() in BasicTTIImpl.
@@ -986,9 +1044,14 @@ public:
 
   /// Return true if the specified condition code is legal on this target.
   bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const {
-    return
-      getCondCodeAction(CC, VT) == Legal ||
-      getCondCodeAction(CC, VT) == Custom;
+    return getCondCodeAction(CC, VT) == Legal;
+  }
+
+  /// Return true if the specified condition code is legal or custom on this
+  /// target.
+  bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const {
+    return getCondCodeAction(CC, VT) == Legal ||
+           getCondCodeAction(CC, VT) == Custom;
   }
 
   /// If the action for this operation is to promote, this method returns the
@@ -1110,10 +1173,6 @@ public:
   /// Certain combinations of ABIs, Targets and features require that types
   /// are legal for some operations and not for other operations.
   /// For MIPS all vector types must be passed through the integer register set.
-  virtual MVT getRegisterTypeForCallingConv(MVT VT) const {
-    return getRegisterType(VT);
-  }
-
   virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context,
                                             EVT VT) const {
     return getRegisterType(Context, VT);
@@ -1172,7 +1231,7 @@ public:
     return getPointerTy(DL).getSizeInBits();
   }
 
-  /// \brief Get maximum # of store operations permitted for llvm.memset
+  /// Get maximum # of store operations permitted for llvm.memset
   ///
   /// This function returns the maximum number of store operations permitted
   /// to replace a call to llvm.memset. The value is set by the target at the
@@ -1182,7 +1241,7 @@ public:
     return OptSize ? MaxStoresPerMemsetOptSize : MaxStoresPerMemset;
   }
 
-  /// \brief Get maximum # of store operations permitted for llvm.memcpy
+  /// Get maximum # of store operations permitted for llvm.memcpy
   ///
   /// This function returns the maximum number of store operations permitted
   /// to replace a call to llvm.memcpy. The value is set by the target at the
@@ -1192,6 +1251,15 @@ public:
     return OptSize ? MaxStoresPerMemcpyOptSize : MaxStoresPerMemcpy;
   }
 
+  /// \brief Get maximum # of store operations to be glued together
+  ///
+  /// This function returns the maximum number of store operations permitted
+  /// to glue together during lowering of llvm.memcpy. The value is set by
+  //  the target at the performance threshold for such a replacement.
+  virtual unsigned getMaxGluedStoresPerMemcpy() const {
+    return MaxGluedStoresPerMemcpy;
+  }
+
   /// Get maximum # of load operations permitted for memcmp
   ///
   /// This function returns the maximum number of load operations permitted
@@ -1202,7 +1270,19 @@ public:
     return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp;
   }
 
-  /// \brief Get maximum # of store operations permitted for llvm.memmove
+  /// For memcmp expansion when the memcmp result is only compared equal or
+  /// not-equal to 0, allow up to this number of load pairs per block. As an
+  /// example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
+  ///   a0 = load2bytes &a[0]
+  ///   b0 = load2bytes &b[0]
+  ///   a2 = load1byte  &a[2]
+  ///   b2 = load1byte  &b[2]
+  ///   r  = cmp eq (a0 ^ b0 | a2 ^ b2), 0
+  virtual unsigned getMemcmpEqZeroLoadsPerBlock() const {
+    return 1;
+  }
+
+  /// Get maximum # of store operations permitted for llvm.memmove
   ///
   /// This function returns the maximum number of store operations permitted
   /// to replace a call to llvm.memmove. The value is set by the target at the
@@ -1212,7 +1292,7 @@ public:
     return OptSize ? MaxStoresPerMemmoveOptSize : MaxStoresPerMemmove;
   }
 
-  /// \brief Determine if the target supports unaligned memory accesses.
+  /// Determine if the target supports unaligned memory accesses.
   ///
   /// This function returns true if the target allows unaligned memory accesses
   /// of the specified type in the given address space. If true, it also returns
@@ -1350,7 +1430,7 @@ public:
   /// If the target has a standard location for the stack protector guard,
   /// returns the address of that location. Otherwise, returns nullptr.
   /// DEPRECATED: please override useLoadStackGuardNode and customize
-  ///             LOAD_STACK_GUARD, or customize @llvm.stackguard().
+  ///             LOAD_STACK_GUARD, or customize \@llvm.stackguard().
   virtual Value *getIRStackGuard(IRBuilder<> &IRB) const;
 
   /// Inserts necessary declarations for SSP (stack protection) purpose.
@@ -1905,7 +1985,7 @@ public:
                                      Type *Ty, unsigned AddrSpace,
                                      Instruction *I = nullptr) const;
 
-  /// \brief Return the cost of the scaling factor used in the addressing mode
+  /// Return the cost of the scaling factor used in the addressing mode
   /// represented by AM for this target, for a load/store of the specified type.
   ///
   /// If the AM is supported, the return value must be >= 0.
@@ -2098,11 +2178,14 @@ public:
     return false;
   }
 
-  /// \brief Get the maximum supported factor for interleaved memory accesses.
+  /// Return true if the target has a vector blend instruction.
+  virtual bool hasVectorBlend() const { return false; }
+
+  /// Get the maximum supported factor for interleaved memory accesses.
   /// Default to be the minimum interleave factor: 2.
   virtual unsigned getMaxSupportedInterleaveFactor() const { return 2; }
 
-  /// \brief Lower an interleaved load to target specific intrinsics. Return
+  /// Lower an interleaved load to target specific intrinsics. Return
   /// true on success.
   ///
   /// \p LI is the vector load instruction.
@@ -2116,7 +2199,7 @@ public:
     return false;
   }
 
-  /// \brief Lower an interleaved store to target specific intrinsics. Return
+  /// Lower an interleaved store to target specific intrinsics. Return
   /// true on success.
   ///
   /// \p SI is the vector store instruction.
@@ -2189,7 +2272,7 @@ public:
     return false;
   }
 
-  /// \brief Return true if it is beneficial to convert a load of a constant to
+  /// Return true if it is beneficial to convert a load of a constant to
   /// just the constant itself.
   /// On some targets it might be more efficient to use a combination of
   /// arithmetic instructions to materialize the constant instead of loading it
@@ -2214,6 +2297,11 @@ public:
     return false;
   }
 
+  // Return true if CodeGenPrepare should consider splitting large offset of a
+  // GEP to make the GEP fit into the addressing mode and can be sunk into the
+  // same blocks of its users.
+  virtual bool shouldConsiderGEPOffsetSplit() const { return false; }
+
   //===--------------------------------------------------------------------===//
   // Runtime Library hooks
   //
@@ -2453,7 +2541,7 @@ protected:
   /// expected to be merged.
   unsigned GatherAllAliasesMaxDepth;
 
-  /// \brief Specify maximum number of store instructions per memset call.
+  /// Specify maximum number of store instructions per memset call.
   ///
   /// When lowering \@llvm.memset this field specifies the maximum number of
   /// store operations that may be substituted for the call to memset. Targets
@@ -2469,7 +2557,7 @@ protected:
   /// to memset, used for functions with OptSize attribute.
   unsigned MaxStoresPerMemsetOptSize;
 
-  /// \brief Specify maximum bytes of store instructions per memcpy call.
+  /// Specify maximum bytes of store instructions per memcpy call.
   ///
   /// When lowering \@llvm.memcpy this field specifies the maximum number of
   /// store operations that may be substituted for a call to memcpy. Targets
@@ -2482,13 +2570,21 @@ protected:
   /// constant size.
   unsigned MaxStoresPerMemcpy;
 
+
+  /// \brief Specify max number of store instructions to glue in inlined memcpy.
+  ///
+  /// When memcpy is inlined based on MaxStoresPerMemcpy, specify maximum number
+  /// of store instructions to keep together. This helps in pairing and
+  //  vectorization later on.
+  unsigned MaxGluedStoresPerMemcpy = 0;
+
   /// Maximum number of store operations that may be substituted for a call to
   /// memcpy, used for functions with OptSize attribute.
   unsigned MaxStoresPerMemcpyOptSize;
   unsigned MaxLoadsPerMemcmp;
   unsigned MaxLoadsPerMemcmpOptSize;
 
-  /// \brief Specify maximum bytes of store instructions per memmove call.
+  /// Specify maximum bytes of store instructions per memmove call.
   ///
   /// When lowering \@llvm.memmove this field specifies the maximum number of
   /// store instructions that may be substituted for a call to memmove. Targets
@@ -2520,6 +2616,16 @@ protected:
   /// sequence of memory operands that is recognized by PrologEpilogInserter.
   MachineBasicBlock *emitPatchPoint(MachineInstr &MI,
                                     MachineBasicBlock *MBB) const;
+
+  /// Replace/modify the XRay custom event operands with target-dependent
+  /// details.
+  MachineBasicBlock *emitXRayCustomEvent(MachineInstr &MI,
+                                         MachineBasicBlock *MBB) const;
+
+  /// Replace/modify the XRay typed event operands with target-dependent
+  /// details.
+  MachineBasicBlock *emitXRayTypedEvent(MachineInstr &MI,
+                                        MachineBasicBlock *MBB) const;
 };
 
 /// This class defines information used to lower LLVM code to legal SelectionDAG
@@ -2539,6 +2645,16 @@ public:
 
   bool isPositionIndependent() const;
 
+  virtual bool isSDNodeSourceOfDivergence(const SDNode *N,
+                                          FunctionLoweringInfo *FLI,
+                                          DivergenceAnalysis *DA) const {
+    return false;
+  }
+
+  virtual bool isSDNodeAlwaysUniform(const SDNode * N) const {
+    return false;
+  }
+
   /// Returns true by value, base pointer and offset pointer and addressing mode
   /// by reference if the node's address can be legally represented as
   /// pre-indexed load / store address.
@@ -2690,6 +2806,30 @@ public:
   bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask,
                             DAGCombinerInfo &DCI) const;
 
+  /// Look at Vector Op. At this point, we know that only the DemandedElts
+  /// elements of the result of Op are ever used downstream.  If we can use
+  /// this information to simplify Op, create a new simplified DAG node and
+  /// return true, storing the original and new nodes in TLO.
+  /// Otherwise, analyze the expression and return a mask of KnownUndef and
+  /// KnownZero elements for the expression (used to simplify the caller).
+  /// The KnownUndef/Zero elements may only be accurate for those bits
+  /// in the DemandedMask.
+  /// \p AssumeSingleUse When this parameter is true, this function will
+  ///    attempt to simplify \p Op even if there are multiple uses.
+  ///    Callers are responsible for correctly updating the DAG based on the
+  ///    results of this function, because simply replacing replacing TLO.Old
+  ///    with TLO.New will be incorrect when this parameter is true and TLO.Old
+  ///    has multiple uses.
+  bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask,
+                                  APInt &KnownUndef, APInt &KnownZero,
+                                  TargetLoweringOpt &TLO, unsigned Depth = 0,
+                                  bool AssumeSingleUse = false) const;
+
+  /// Helper wrapper around SimplifyDemandedVectorElts
+  bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
+                                  APInt &KnownUndef, APInt &KnownZero,
+                                  DAGCombinerInfo &DCI) const;
+
   /// Determine which of the bits specified in Mask are known to be either zero
   /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts
   /// argument allows us to only collect the known bits that are shared by the
@@ -2718,6 +2858,15 @@ public:
                                                    const SelectionDAG &DAG,
                                                    unsigned Depth = 0) const;
 
+  /// Attempt to simplify any target nodes based on the demanded vector
+  /// elements, returning true on success. Otherwise, analyze the expression and
+  /// return a mask of KnownUndef and KnownZero elements for the expression
+  /// (used to simplify the caller). The KnownUndef/Zero elements may only be
+  /// accurate for those bits in the DemandedMask
+  virtual bool SimplifyDemandedVectorEltsForTargetNode(
+      SDValue Op, const APInt &DemandedElts, APInt &KnownUndef,
+      APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth = 0) const;
+
   struct DAGCombinerInfo {
     void *DC;  // The DAG Combiner object.
     CombineLevel Level;
@@ -2731,7 +2880,7 @@ public:
 
     bool isBeforeLegalize() const { return Level == BeforeLegalizeTypes; }
     bool isBeforeLegalizeOps() const { return Level < AfterLegalizeVectorOps; }
-    bool isAfterLegalizeVectorOps() const {
+    bool isAfterLegalizeDAG() const {
       return Level == AfterLegalizeDAG;
     }
     CombineLevel getDAGCombineLevel() { return Level; }
@@ -2753,12 +2902,8 @@ public:
   /// from getBooleanContents().
   bool isConstFalseVal(const SDNode *N) const;
 
-  /// Return a constant of type VT that contains a true value that respects
-  /// getBooleanContents()
-  SDValue getConstTrueVal(SelectionDAG &DAG, EVT VT, const SDLoc &DL) const;
-
   /// Return if \p N is a True value when extended to \p VT.
-  bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool Signed) const;
+  bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const;
 
   /// Try to simplify a setcc built with the specified operands and cc. If it is
   /// unable to simplify it, return a null SDValue.
@@ -3479,7 +3624,7 @@ public:
   /// bounds the returned pointer is unspecified, but will be within the vector
   /// bounds.
   SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT,
-                                  SDValue Idx) const;
+                                  SDValue Index) const;
 
   //===--------------------------------------------------------------------===//
   // Instruction Emitting Hooks
@@ -3518,6 +3663,13 @@ public:
   virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
                                           SelectionDAG &DAG) const;
 
+  /// Expands target specific indirect branch for the case of JumpTable
+  /// expanasion.
+  virtual SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value, SDValue Addr,
+                                         SelectionDAG &DAG) const {
+    return DAG.getNode(ISD::BRIND, dl, MVT::Other, Value, Addr);
+  }
+
   // seteq(x, 0) -> truncate(srl(ctlz(zext(x)), log2(#bits)))
   // If we're comparing for equality to zero and isCtlzFast is true, expose the
   // fact that this can be implemented as a ctlz/srl pair, so that the dag
@@ -3528,6 +3680,11 @@ private:
   SDValue simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
                                ISD::CondCode Cond, DAGCombinerInfo &DCI,
                                const SDLoc &DL) const;
+
+  SDValue optimizeSetCCOfSignedTruncationCheck(EVT SCCVT, SDValue N0,
+                                               SDValue N1, ISD::CondCode Cond,
+                                               DAGCombinerInfo &DCI,
+                                               const SDLoc &DL) const;
 };
 
 /// Given an LLVM IR type and return type attributes, compute the return value
diff --git a/include/llvm/CodeGen/TargetLoweringObjectFile.h b/include/llvm/CodeGen/TargetLoweringObjectFile.h
deleted file mode 100644
index fe77c2954129..000000000000
--- a/include/llvm/CodeGen/TargetLoweringObjectFile.h
+++ /dev/null
@@ -1,194 +0,0 @@
-//===-- llvm/CodeGen/TargetLoweringObjectFile.h - Object Info ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements classes used to handle lowerings specific to common
-// object file formats.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_TARGETLOWERINGOBJECTFILE_H
-#define LLVM_CODEGEN_TARGETLOWERINGOBJECTFILE_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/IR/Module.h"
-#include "llvm/MC/MCObjectFileInfo.h"
-#include "llvm/MC/SectionKind.h"
-#include <cstdint>
-
-namespace llvm {
-
-class GlobalValue;
-class MachineModuleInfo;
-class Mangler;
-class MCContext;
-class MCExpr;
-class MCSection;
-class MCSymbol;
-class MCSymbolRefExpr;
-class MCStreamer;
-class MCValue;
-class TargetMachine;
-
-class TargetLoweringObjectFile : public MCObjectFileInfo {
-  MCContext *Ctx = nullptr;
-
-  /// Name-mangler for global names.
-  Mangler *Mang = nullptr;
-
-protected:
-  bool SupportIndirectSymViaGOTPCRel = false;
-  bool SupportGOTPCRelWithOffset = true;
-
-  /// This section contains the static constructor pointer list.
-  MCSection *StaticCtorSection = nullptr;
-
-  /// This section contains the static destructor pointer list.
-  MCSection *StaticDtorSection = nullptr;
-
-public:
-  TargetLoweringObjectFile() = default;
-  TargetLoweringObjectFile(const TargetLoweringObjectFile &) = delete;
-  TargetLoweringObjectFile &
-  operator=(const TargetLoweringObjectFile &) = delete;
-  virtual ~TargetLoweringObjectFile();
-
-  MCContext &getContext() const { return *Ctx; }
-  Mangler &getMangler() const { return *Mang; }
-
-  /// This method must be called before any actual lowering is done.  This
-  /// specifies the current context for codegen, and gives the lowering
-  /// implementations a chance to set up their default sections.
-  virtual void Initialize(MCContext &ctx, const TargetMachine &TM);
-
-  virtual void emitPersonalityValue(MCStreamer &Streamer, const DataLayout &TM,
-                                    const MCSymbol *Sym) const;
-
-  /// Emit the module-level metadata that the platform cares about.
-  virtual void emitModuleMetadata(MCStreamer &Streamer, Module &M,
-                                  const TargetMachine &TM) const {}
-
-  /// Given a constant with the SectionKind, return a section that it should be
-  /// placed in.
-  virtual MCSection *getSectionForConstant(const DataLayout &DL,
-                                           SectionKind Kind,
-                                           const Constant *C,
-                                           unsigned &Align) const;
-
-  /// Classify the specified global variable into a set of target independent
-  /// categories embodied in SectionKind.
-  static SectionKind getKindForGlobal(const GlobalObject *GO,
-                                      const TargetMachine &TM);
-
-  /// This method computes the appropriate section to emit the specified global
-  /// variable or function definition. This should not be passed external (or
-  /// available externally) globals.
-  MCSection *SectionForGlobal(const GlobalObject *GO, SectionKind Kind,
-                              const TargetMachine &TM) const;
-
-  /// This method computes the appropriate section to emit the specified global
-  /// variable or function definition. This should not be passed external (or
-  /// available externally) globals.
-  MCSection *SectionForGlobal(const GlobalObject *GO,
-                              const TargetMachine &TM) const {
-    return SectionForGlobal(GO, getKindForGlobal(GO, TM), TM);
-  }
-
-  virtual void getNameWithPrefix(SmallVectorImpl<char> &OutName,
-                                 const GlobalValue *GV,
-                                 const TargetMachine &TM) const;
-
-  virtual MCSection *getSectionForJumpTable(const Function &F,
-                                            const TargetMachine &TM) const;
-
-  virtual bool shouldPutJumpTableInFunctionSection(bool UsesLabelDifference,
-                                                   const Function &F) const;
-
-  /// Targets should implement this method to assign a section to globals with
-  /// an explicit section specfied. The implementation of this method can
-  /// assume that GO->hasSection() is true.
-  virtual MCSection *
-  getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind,
-                           const TargetMachine &TM) const = 0;
-
-  /// Return an MCExpr to use for a reference to the specified global variable
-  /// from exception handling information.
-  virtual const MCExpr *getTTypeGlobalReference(const GlobalValue *GV,
-                                                unsigned Encoding,
-                                                const TargetMachine &TM,
-                                                MachineModuleInfo *MMI,
-                                                MCStreamer &Streamer) const;
-
-  /// Return the MCSymbol for a private symbol with global value name as its
-  /// base, with the specified suffix.
-  MCSymbol *getSymbolWithGlobalValueBase(const GlobalValue *GV,
-                                         StringRef Suffix,
-                                         const TargetMachine &TM) const;
-
-  // The symbol that gets passed to .cfi_personality.
-  virtual MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV,
-                                            const TargetMachine &TM,
-                                            MachineModuleInfo *MMI) const;
-
-  const MCExpr *getTTypeReference(const MCSymbolRefExpr *Sym, unsigned Encoding,
-                                  MCStreamer &Streamer) const;
-
-  virtual MCSection *getStaticCtorSection(unsigned Priority,
-                                          const MCSymbol *KeySym) const {
-    return StaticCtorSection;
-  }
-
-  virtual MCSection *getStaticDtorSection(unsigned Priority,
-                                          const MCSymbol *KeySym) const {
-    return StaticDtorSection;
-  }
-
-  /// \brief Create a symbol reference to describe the given TLS variable when
-  /// emitting the address in debug info.
-  virtual const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const;
-
-  virtual const MCExpr *lowerRelativeReference(const GlobalValue *LHS,
-                                               const GlobalValue *RHS,
-                                               const TargetMachine &TM) const {
-    return nullptr;
-  }
-
-  /// \brief Target supports replacing a data "PC"-relative access to a symbol
-  /// through another symbol, by accessing the later via a GOT entry instead?
-  bool supportIndirectSymViaGOTPCRel() const {
-    return SupportIndirectSymViaGOTPCRel;
-  }
-
-  /// \brief Target GOT "PC"-relative relocation supports encoding an additional
-  /// binary expression with an offset?
-  bool supportGOTPCRelWithOffset() const {
-    return SupportGOTPCRelWithOffset;
-  }
-
-  /// \brief Get the target specific PC relative GOT entry relocation
-  virtual const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym,
-                                                  const MCValue &MV,
-                                                  int64_t Offset,
-                                                  MachineModuleInfo *MMI,
-                                                  MCStreamer &Streamer) const {
-    return nullptr;
-  }
-
-  virtual void emitLinkerFlagsForGlobal(raw_ostream &OS,
-                                        const GlobalValue *GV) const {}
-
-protected:
-  virtual MCSection *SelectSectionForGlobal(const GlobalObject *GO,
-                                            SectionKind Kind,
-                                            const TargetMachine &TM) const = 0;
-};
-
-} // end namespace llvm
-
-#endif // LLVM_CODEGEN_TARGETLOWERINGOBJECTFILE_H
diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index 69de9f8cb35d..f5c7fc824ab4 100644
--- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -15,9 +15,9 @@
 #ifndef LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H
 #define LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H
 
-#include "llvm/CodeGen/TargetLoweringObjectFile.h"
 #include "llvm/IR/Module.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 
 namespace llvm {
 
@@ -36,16 +36,18 @@ class TargetLoweringObjectFileELF : public TargetLoweringObjectFile {
 protected:
   MCSymbolRefExpr::VariantKind PLTRelativeVariantKind =
       MCSymbolRefExpr::VK_None;
+  const TargetMachine *TM;
 
 public:
   TargetLoweringObjectFileELF() = default;
   ~TargetLoweringObjectFileELF() override = default;
 
+  void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+
   /// Emit Obj-C garbage collection and linker options.
-  void emitModuleMetadata(MCStreamer &Streamer, Module &M,
-                          const TargetMachine &TM) const override;
+  void emitModuleMetadata(MCStreamer &Streamer, Module &M) const override;
 
-  void emitPersonalityValue(MCStreamer &Streamer, const DataLayout &TM,
+  void emitPersonalityValue(MCStreamer &Streamer, const DataLayout &DL,
                             const MCSymbol *Sym) const override;
 
   /// Given a constant with the SectionKind, return a section that it should be
@@ -98,8 +100,7 @@ public:
   void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
 
   /// Emit the module flags that specify the garbage collection information.
-  void emitModuleMetadata(MCStreamer &Streamer, Module &M,
-                          const TargetMachine &TM) const override;
+  void emitModuleMetadata(MCStreamer &Streamer, Module &M) const override;
 
   MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind,
                                     const TargetMachine &TM) const override;
@@ -153,8 +154,7 @@ public:
                                     const TargetMachine &TM) const override;
 
   /// Emit Obj-C garbage collection and linker options.
-  void emitModuleMetadata(MCStreamer &Streamer, Module &M,
-                          const TargetMachine &TM) const override;
+  void emitModuleMetadata(MCStreamer &Streamer, Module &M) const override;
 
   MCSection *getStaticCtorSection(unsigned Priority,
                                   const MCSymbol *KeySym) const override;
@@ -163,6 +163,19 @@ public:
 
   void emitLinkerFlagsForGlobal(raw_ostream &OS,
                                 const GlobalValue *GV) const override;
+
+  void emitLinkerFlagsForUsed(raw_ostream &OS,
+                              const GlobalValue *GV) const override;
+
+  const MCExpr *lowerRelativeReference(const GlobalValue *LHS,
+                                       const GlobalValue *RHS,
+                                       const TargetMachine &TM) const override;
+
+  /// Given a mergeable constant with the specified size and relocation
+  /// information, return a section that it should be placed in.
+  MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
+                                   const Constant *C,
+                                   unsigned &Align) const override;
 };
 
 class TargetLoweringObjectFileWasm : public TargetLoweringObjectFile {
diff --git a/include/llvm/CodeGen/TargetOpcodes.def b/include/llvm/CodeGen/TargetOpcodes.def
deleted file mode 100644
index d3e8483798a7..000000000000
--- a/include/llvm/CodeGen/TargetOpcodes.def
+++ /dev/null
@@ -1,461 +0,0 @@
-//===-- llvm/CodeGen/TargetOpcodes.def - Target Indep Opcodes ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the target independent instruction opcodes.
-//
-//===----------------------------------------------------------------------===//
-
-// NOTE: NO INCLUDE GUARD DESIRED!
-
-/// HANDLE_TARGET_OPCODE defines an opcode and its associated enum value.
-///
-#ifndef HANDLE_TARGET_OPCODE
-#define HANDLE_TARGET_OPCODE(OPC, NUM)
-#endif
-
-/// HANDLE_TARGET_OPCODE_MARKER defines an alternative identifier for an opcode.
-///
-#ifndef HANDLE_TARGET_OPCODE_MARKER
-#define HANDLE_TARGET_OPCODE_MARKER(IDENT, OPC)
-#endif
-
-/// Every instruction defined here must also appear in Target.td.
-///
-HANDLE_TARGET_OPCODE(PHI)
-HANDLE_TARGET_OPCODE(INLINEASM)
-HANDLE_TARGET_OPCODE(CFI_INSTRUCTION)
-HANDLE_TARGET_OPCODE(EH_LABEL)
-HANDLE_TARGET_OPCODE(GC_LABEL)
-HANDLE_TARGET_OPCODE(ANNOTATION_LABEL)
-
-/// KILL - This instruction is a noop that is used only to adjust the
-/// liveness of registers. This can be useful when dealing with
-/// sub-registers.
-HANDLE_TARGET_OPCODE(KILL)
-
-/// EXTRACT_SUBREG - This instruction takes two operands: a register
-/// that has subregisters, and a subregister index. It returns the
-/// extracted subregister value. This is commonly used to implement
-/// truncation operations on target architectures which support it.
-HANDLE_TARGET_OPCODE(EXTRACT_SUBREG)
-
-/// INSERT_SUBREG - This instruction takes three operands: a register that
-/// has subregisters, a register providing an insert value, and a
-/// subregister index. It returns the value of the first register with the
-/// value of the second register inserted. The first register is often
-/// defined by an IMPLICIT_DEF, because it is commonly used to implement
-/// anyext operations on target architectures which support it.
-HANDLE_TARGET_OPCODE(INSERT_SUBREG)
-
-/// IMPLICIT_DEF - This is the MachineInstr-level equivalent of undef.
-HANDLE_TARGET_OPCODE(IMPLICIT_DEF)
-
-/// SUBREG_TO_REG - Assert the value of bits in a super register.
-/// The result of this instruction is the value of the second operand inserted
-/// into the subregister specified by the third operand. All other bits are
-/// assumed to be equal to the bits in the immediate integer constant in the
-/// first operand. This instruction just communicates information; No code
-/// should be generated.
-/// This is typically used after an instruction where the write to a subregister
-/// implicitly cleared the bits in the super registers.
-HANDLE_TARGET_OPCODE(SUBREG_TO_REG)
-
-/// COPY_TO_REGCLASS - This instruction is a placeholder for a plain
-/// register-to-register copy into a specific register class. This is only
-/// used between instruction selection and MachineInstr creation, before
-/// virtual registers have been created for all the instructions, and it's
-/// only needed in cases where the register classes implied by the
-/// instructions are insufficient. It is emitted as a COPY MachineInstr.
-  HANDLE_TARGET_OPCODE(COPY_TO_REGCLASS)
-
-/// DBG_VALUE - a mapping of the llvm.dbg.value intrinsic
-HANDLE_TARGET_OPCODE(DBG_VALUE)
-
-/// REG_SEQUENCE - This variadic instruction is used to form a register that
-/// represents a consecutive sequence of sub-registers. It's used as a
-/// register coalescing / allocation aid and must be eliminated before code
-/// emission.
-// In SDNode form, the first operand encodes the register class created by
-// the REG_SEQUENCE, while each subsequent pair names a vreg + subreg index
-// pair.  Once it has been lowered to a MachineInstr, the regclass operand
-// is no longer present.
-/// e.g. v1027 = REG_SEQUENCE v1024, 3, v1025, 4, v1026, 5
-/// After register coalescing references of v1024 should be replace with
-/// v1027:3, v1025 with v1027:4, etc.
-  HANDLE_TARGET_OPCODE(REG_SEQUENCE)
-
-/// COPY - Target-independent register copy. This instruction can also be
-/// used to copy between subregisters of virtual registers.
-  HANDLE_TARGET_OPCODE(COPY)
-
-/// BUNDLE - This instruction represents an instruction bundle. Instructions
-/// which immediately follow a BUNDLE instruction which are marked with
-/// 'InsideBundle' flag are inside the bundle.
-HANDLE_TARGET_OPCODE(BUNDLE)
-
-/// Lifetime markers.
-HANDLE_TARGET_OPCODE(LIFETIME_START)
-HANDLE_TARGET_OPCODE(LIFETIME_END)
-
-/// A Stackmap instruction captures the location of live variables at its
-/// position in the instruction stream. It is followed by a shadow of bytes
-/// that must lie within the function and not contain another stackmap.
-HANDLE_TARGET_OPCODE(STACKMAP)
-
-/// FEntry all - This is a marker instruction which gets translated into a raw fentry call.
-HANDLE_TARGET_OPCODE(FENTRY_CALL)
-
-/// Patchable call instruction - this instruction represents a call to a
-/// constant address, followed by a series of NOPs. It is intended to
-/// support optimizations for dynamic languages (such as javascript) that
-/// rewrite calls to runtimes with more efficient code sequences.
-/// This also implies a stack map.
-HANDLE_TARGET_OPCODE(PATCHPOINT)
-
-/// This pseudo-instruction loads the stack guard value. Targets which need
-/// to prevent the stack guard value or address from being spilled to the
-/// stack should override TargetLowering::emitLoadStackGuardNode and
-/// additionally expand this pseudo after register allocation.
-HANDLE_TARGET_OPCODE(LOAD_STACK_GUARD)
-
-/// Call instruction with associated vm state for deoptimization and list
-/// of live pointers for relocation by the garbage collector.  It is
-/// intended to support garbage collection with fully precise relocating
-/// collectors and deoptimizations in either the callee or caller.
-HANDLE_TARGET_OPCODE(STATEPOINT)
-
-/// Instruction that records the offset of a local stack allocation passed to
-/// llvm.localescape. It has two arguments: the symbol for the label and the
-/// frame index of the local stack allocation.
-HANDLE_TARGET_OPCODE(LOCAL_ESCAPE)
-
-/// Wraps a machine instruction which can fault, bundled with associated
-/// information on how to handle such a fault.
-/// For example loading instruction that may page fault, bundled with associated
-/// information on how to handle such a page fault.  It is intended to support
-/// "zero cost" null checks in managed languages by allowing LLVM to fold
-/// comparisons into existing memory operations.
-HANDLE_TARGET_OPCODE(FAULTING_OP)
-
-/// Wraps a machine instruction to add patchability constraints.  An
-/// instruction wrapped in PATCHABLE_OP has to either have a minimum
-/// size or be preceded with a nop of that size.  The first operand is
-/// an immediate denoting the minimum size of the instruction, the
-/// second operand is an immediate denoting the opcode of the original
-/// instruction.  The rest of the operands are the operands of the
-/// original instruction.
-HANDLE_TARGET_OPCODE(PATCHABLE_OP)
-
-/// This is a marker instruction which gets translated into a nop sled, useful
-/// for inserting instrumentation instructions at runtime.
-HANDLE_TARGET_OPCODE(PATCHABLE_FUNCTION_ENTER)
-
-/// Wraps a return instruction and its operands to enable adding nop sleds
-/// either before or after the return. The nop sleds are useful for inserting
-/// instrumentation instructions at runtime.
-/// The patch here replaces the return instruction.
-HANDLE_TARGET_OPCODE(PATCHABLE_RET)
-
-/// This is a marker instruction which gets translated into a nop sled, useful
-/// for inserting instrumentation instructions at runtime.
-/// The patch here prepends the return instruction.
-/// The same thing as in x86_64 is not possible for ARM because it has multiple
-/// return instructions. Furthermore, CPU allows parametrized and even
-/// conditional return instructions. In the current ARM implementation we are
-/// making use of the fact that currently LLVM doesn't seem to generate
-/// conditional return instructions.
-/// On ARM, the same instruction can be used for popping multiple registers
-/// from the stack and returning (it just pops pc register too), and LLVM
-/// generates it sometimes. So we can't insert the sled between this stack
-/// adjustment and the return without splitting the original instruction into 2
-/// instructions. So on ARM, rather than jumping into the exit trampoline, we
-/// call it, it does the tracing, preserves the stack and returns.
-HANDLE_TARGET_OPCODE(PATCHABLE_FUNCTION_EXIT)
-
-/// Wraps a tail call instruction and its operands to enable adding nop sleds
-/// either before or after the tail exit. We use this as a disambiguation from
-/// PATCHABLE_RET which specifically only works for return instructions.
-HANDLE_TARGET_OPCODE(PATCHABLE_TAIL_CALL)
-
-/// Wraps a logging call and its arguments with nop sleds. At runtime, this can be
-/// patched to insert instrumentation instructions.
-HANDLE_TARGET_OPCODE(PATCHABLE_EVENT_CALL)
-
-/// The following generic opcodes are not supposed to appear after ISel.
-/// This is something we might want to relax, but for now, this is convenient
-/// to produce diagnostics.
-
-/// Generic ADD instruction. This is an integer add.
-HANDLE_TARGET_OPCODE(G_ADD)
-HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_START, G_ADD)
-
-/// Generic SUB instruction. This is an integer sub.
-HANDLE_TARGET_OPCODE(G_SUB)
-
-// Generic multiply instruction.
-HANDLE_TARGET_OPCODE(G_MUL)
-
-// Generic signed division instruction.
-HANDLE_TARGET_OPCODE(G_SDIV)
-
-// Generic unsigned division instruction.
-HANDLE_TARGET_OPCODE(G_UDIV)
-
-// Generic signed remainder instruction.
-HANDLE_TARGET_OPCODE(G_SREM)
-
-// Generic unsigned remainder instruction.
-HANDLE_TARGET_OPCODE(G_UREM)
-
-/// Generic bitwise and instruction.
-HANDLE_TARGET_OPCODE(G_AND)
-
-/// Generic bitwise or instruction.
-HANDLE_TARGET_OPCODE(G_OR)
-
-/// Generic bitwise exclusive-or instruction.
-HANDLE_TARGET_OPCODE(G_XOR)
-
-
-HANDLE_TARGET_OPCODE(G_IMPLICIT_DEF)
-
-/// Generic PHI instruction with types.
-HANDLE_TARGET_OPCODE(G_PHI)
-
-/// Generic instruction to materialize the address of an alloca or other
-/// stack-based object.
-HANDLE_TARGET_OPCODE(G_FRAME_INDEX)
-
-/// Generic reference to global value.
-HANDLE_TARGET_OPCODE(G_GLOBAL_VALUE)
-
-/// Generic instruction to extract blocks of bits from the register given
-/// (typically a sub-register COPY after instruction selection).
-HANDLE_TARGET_OPCODE(G_EXTRACT)
-
-HANDLE_TARGET_OPCODE(G_UNMERGE_VALUES)
-
-/// Generic instruction to insert blocks of bits from the registers given into
-/// the source.
-HANDLE_TARGET_OPCODE(G_INSERT)
-
-/// Generic instruction to paste a variable number of components together into a
-/// larger register.
-HANDLE_TARGET_OPCODE(G_MERGE_VALUES)
-
-/// Generic pointer to int conversion.
-HANDLE_TARGET_OPCODE(G_PTRTOINT)
-
-/// Generic int to pointer conversion.
-HANDLE_TARGET_OPCODE(G_INTTOPTR)
-
-/// Generic bitcast. The source and destination types must be different, or a
-/// COPY is the relevant instruction.
-HANDLE_TARGET_OPCODE(G_BITCAST)
-
-/// Generic load.
-HANDLE_TARGET_OPCODE(G_LOAD)
-
-/// Generic store.
-HANDLE_TARGET_OPCODE(G_STORE)
-
-/// Generic atomic cmpxchg with internal success check.
-HANDLE_TARGET_OPCODE(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
-
-/// Generic atomic cmpxchg.
-HANDLE_TARGET_OPCODE(G_ATOMIC_CMPXCHG)
-
-/// Generic atomicrmw.
-HANDLE_TARGET_OPCODE(G_ATOMICRMW_XCHG)
-HANDLE_TARGET_OPCODE(G_ATOMICRMW_ADD)
-HANDLE_TARGET_OPCODE(G_ATOMICRMW_SUB)
-HANDLE_TARGET_OPCODE(G_ATOMICRMW_AND)
-HANDLE_TARGET_OPCODE(G_ATOMICRMW_NAND)
-HANDLE_TARGET_OPCODE(G_ATOMICRMW_OR)
-HANDLE_TARGET_OPCODE(G_ATOMICRMW_XOR)
-HANDLE_TARGET_OPCODE(G_ATOMICRMW_MAX)
-HANDLE_TARGET_OPCODE(G_ATOMICRMW_MIN)
-HANDLE_TARGET_OPCODE(G_ATOMICRMW_UMAX)
-HANDLE_TARGET_OPCODE(G_ATOMICRMW_UMIN)
-
-/// Generic conditional branch instruction.
-HANDLE_TARGET_OPCODE(G_BRCOND)
-
-/// Generic indirect branch instruction.
-HANDLE_TARGET_OPCODE(G_BRINDIRECT)
-
-/// Generic intrinsic use (without side effects).
-HANDLE_TARGET_OPCODE(G_INTRINSIC)
-
-/// Generic intrinsic use (with side effects).
-HANDLE_TARGET_OPCODE(G_INTRINSIC_W_SIDE_EFFECTS)
-
-/// Generic extension allowing rubbish in high bits.
-HANDLE_TARGET_OPCODE(G_ANYEXT)
-
-/// Generic instruction to discard the high bits of a register. This differs
-/// from (G_EXTRACT val, 0) on its action on vectors: G_TRUNC will truncate
-/// each element individually, G_EXTRACT will typically discard the high
-/// elements of the vector.
-HANDLE_TARGET_OPCODE(G_TRUNC)
-
-/// Generic integer constant.
-HANDLE_TARGET_OPCODE(G_CONSTANT)
-
-/// Generic floating constant.
-HANDLE_TARGET_OPCODE(G_FCONSTANT)
-
-/// Generic va_start instruction. Stores to its one pointer operand.
-HANDLE_TARGET_OPCODE(G_VASTART)
-
-/// Generic va_start instruction. Stores to its one pointer operand.
-HANDLE_TARGET_OPCODE(G_VAARG)
-
-// Generic sign extend
-HANDLE_TARGET_OPCODE(G_SEXT)
-
-// Generic zero extend
-HANDLE_TARGET_OPCODE(G_ZEXT)
-
-// Generic left-shift
-HANDLE_TARGET_OPCODE(G_SHL)
-
-// Generic logical right-shift
-HANDLE_TARGET_OPCODE(G_LSHR)
-
-// Generic arithmetic right-shift
-HANDLE_TARGET_OPCODE(G_ASHR)
-
-/// Generic integer-base comparison, also applicable to vectors of integers.
-HANDLE_TARGET_OPCODE(G_ICMP)
-
-/// Generic floating-point comparison, also applicable to vectors.
-HANDLE_TARGET_OPCODE(G_FCMP)
-
-/// Generic select.
-HANDLE_TARGET_OPCODE(G_SELECT)
-
-/// Generic unsigned add instruction, consuming the normal operands plus a carry
-/// flag, and similarly producing the result and a carry flag.
-HANDLE_TARGET_OPCODE(G_UADDE)
-
-/// Generic unsigned subtract instruction, consuming the normal operands plus a
-/// carry flag, and similarly producing the result and a carry flag.
-HANDLE_TARGET_OPCODE(G_USUBE)
-
-/// Generic signed add instruction, producing the result and a signed overflow
-/// flag.
-HANDLE_TARGET_OPCODE(G_SADDO)
-
-/// Generic signed subtract instruction, producing the result and a signed
-/// overflow flag.
-HANDLE_TARGET_OPCODE(G_SSUBO)
-
-/// Generic unsigned multiply instruction, producing the result and a signed
-/// overflow flag.
-HANDLE_TARGET_OPCODE(G_UMULO)
-
-/// Generic signed multiply instruction, producing the result and a signed
-/// overflow flag.
-HANDLE_TARGET_OPCODE(G_SMULO)
-
-// Multiply two numbers at twice the incoming bit width (unsigned) and return
-// the high half of the result.
-HANDLE_TARGET_OPCODE(G_UMULH)
-
-// Multiply two numbers at twice the incoming bit width (signed) and return
-// the high half of the result.
-HANDLE_TARGET_OPCODE(G_SMULH)
-
-/// Generic FP addition.
-HANDLE_TARGET_OPCODE(G_FADD)
-
-/// Generic FP subtraction.
-HANDLE_TARGET_OPCODE(G_FSUB)
-
-/// Generic FP multiplication.
-HANDLE_TARGET_OPCODE(G_FMUL)
-
-/// Generic FMA multiplication. Behaves like llvm fma intrinsic
-HANDLE_TARGET_OPCODE(G_FMA)
-
-/// Generic FP division.
-HANDLE_TARGET_OPCODE(G_FDIV)
-
-/// Generic FP remainder.
-HANDLE_TARGET_OPCODE(G_FREM)
-
-/// Generic FP exponentiation.
-HANDLE_TARGET_OPCODE(G_FPOW)
-
-/// Generic base-e exponential of a value.
-HANDLE_TARGET_OPCODE(G_FEXP)
-
-/// Generic base-2 exponential of a value.
-HANDLE_TARGET_OPCODE(G_FEXP2)
-
-/// Floating point base-e logarithm of a value.
-HANDLE_TARGET_OPCODE(G_FLOG)
-
-/// Floating point base-2 logarithm of a value.
-HANDLE_TARGET_OPCODE(G_FLOG2)
-
-/// Generic FP negation.
-HANDLE_TARGET_OPCODE(G_FNEG)
-
-/// Generic FP extension.
-HANDLE_TARGET_OPCODE(G_FPEXT)
-
-/// Generic float to signed-int conversion
-HANDLE_TARGET_OPCODE(G_FPTRUNC)
-
-/// Generic float to signed-int conversion
-HANDLE_TARGET_OPCODE(G_FPTOSI)
-
-/// Generic float to unsigned-int conversion
-HANDLE_TARGET_OPCODE(G_FPTOUI)
-
-/// Generic signed-int to float conversion
-HANDLE_TARGET_OPCODE(G_SITOFP)
-
-/// Generic unsigned-int to float conversion
-HANDLE_TARGET_OPCODE(G_UITOFP)
-
-/// Generic pointer offset
-HANDLE_TARGET_OPCODE(G_GEP)
-
-/// Clear the specified number of low bits in a pointer. This rounds the value
-/// *down* to the given alignment.
-HANDLE_TARGET_OPCODE(G_PTR_MASK)
-
-/// Generic BRANCH instruction. This is an unconditional branch.
-HANDLE_TARGET_OPCODE(G_BR)
-
-/// Generic insertelement.
-HANDLE_TARGET_OPCODE(G_INSERT_VECTOR_ELT)
-
-/// Generic extractelement.
-HANDLE_TARGET_OPCODE(G_EXTRACT_VECTOR_ELT)
-
-/// Generic shufflevector.
-HANDLE_TARGET_OPCODE(G_SHUFFLE_VECTOR)
-
-/// Generic byte swap.
-HANDLE_TARGET_OPCODE(G_BSWAP)
-
-// TODO: Add more generic opcodes as we move along.
-
-/// Marker for the end of the generic opcode.
-/// This is used to check if an opcode is in the range of the
-/// generic opcodes.
-HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_END, G_BSWAP)
-
-/// BUILTIN_OP_END - This must be the last enum value in this list.
-/// The target-specific post-isel opcode values start here.
-HANDLE_TARGET_OPCODE_MARKER(GENERIC_OP_END, PRE_ISEL_GENERIC_OPCODE_END)
diff --git a/include/llvm/CodeGen/TargetOpcodes.h b/include/llvm/CodeGen/TargetOpcodes.h
index 3ca31a970944..d0d959c4ae11 100644
--- a/include/llvm/CodeGen/TargetOpcodes.h
+++ b/include/llvm/CodeGen/TargetOpcodes.h
@@ -22,7 +22,7 @@ namespace TargetOpcode {
 enum {
 #define HANDLE_TARGET_OPCODE(OPC) OPC,
 #define HANDLE_TARGET_OPCODE_MARKER(IDENT, OPC) IDENT = OPC,
-#include "llvm/CodeGen/TargetOpcodes.def"
+#include "llvm/Support/TargetOpcodes.def"
 };
 } // end namespace TargetOpcode
 
diff --git a/include/llvm/CodeGen/TargetPassConfig.h b/include/llvm/CodeGen/TargetPassConfig.h
index 1aaa85d77a54..5918c524d11c 100644
--- a/include/llvm/CodeGen/TargetPassConfig.h
+++ b/include/llvm/CodeGen/TargetPassConfig.h
@@ -84,20 +84,6 @@ template <> struct isPodLike<IdentifyingPassPtr> {
 /// This is an ImmutablePass solely for the purpose of exposing CodeGen options
 /// to the internals of other CodeGen passes.
 class TargetPassConfig : public ImmutablePass {
-public:
-  /// Pseudo Pass IDs. These are defined within TargetPassConfig because they
-  /// are unregistered pass IDs. They are only useful for use with
-  /// TargetPassConfig APIs to identify multiple occurrences of the same pass.
-  ///
-
-  /// EarlyTailDuplicate - A clone of the TailDuplicate pass that runs early
-  /// during codegen, on SSA form.
-  static char EarlyTailDuplicateID;
-
-  /// PostRAMachineLICM - A clone of the LICM pass that runs during late machine
-  /// optimization after regalloc.
-  static char PostRAMachineLICMID;
-
 private:
   PassManagerBase *PM = nullptr;
   AnalysisID StartBefore = nullptr;
@@ -218,9 +204,6 @@ public:
   /// Return true if the optimized regalloc pipeline is enabled.
   bool getOptimizeRegAlloc() const;
 
-  /// Return true if shrink wrapping is enabled.
-  bool getEnableShrinkWrap() const;
-
   /// Return true if the default global register allocator is in use and
   /// has not be overriden on the command line with '-regalloc=...'
   bool usingDefaultRegAlloc() const;
@@ -229,7 +212,7 @@ public:
   /// representation to the MI representation.
   /// Adds IR based lowering and target specific optimization passes and finally
   /// the core instruction selection passes.
-  /// \returns true if an error occured, false otherwise.
+  /// \returns true if an error occurred, false otherwise.
   bool addISelPasses();
 
   /// Add common target configurable passes that perform LLVM IR to IR
@@ -320,14 +303,10 @@ public:
   /// verification is enabled.
   void addVerifyPass(const std::string &Banner);
 
-  /// Check whether or not GlobalISel should be enabled by default.
-  /// Fallback/abort behavior is controlled via other methods.
-  virtual bool isGlobalISelEnabled() const;
-
   /// Check whether or not GlobalISel should abort on error.
-  /// When this is disable, GlobalISel will fall back on SDISel instead of
+  /// When this is disabled, GlobalISel will fall back on SDISel instead of
   /// erroring out.
-  virtual bool isGlobalISelAbortEnabled() const;
+  bool isGlobalISelAbortEnabled() const;
 
   /// Check whether or not a diagnostic should be emitted when GlobalISel
   /// uses the fallback path. In other words, it will emit a diagnostic
@@ -416,6 +395,13 @@ protected:
   /// immediately before machine code is emitted.
   virtual void addPreEmitPass() { }
 
+  /// Targets may add passes immediately before machine code is emitted in this
+  /// callback. This is called even later than `addPreEmitPass`.
+  // FIXME: Rename `addPreEmitPass` to something more sensible given its actual
+  // position and remove the `2` suffix here as this callback is what
+  // `addPreEmitPass` *should* be but in reality isn't.
+  virtual void addPreEmitPass2() {}
+
   /// Utilities for targets to add passes to the pass manager.
   ///
 
diff --git a/include/llvm/CodeGen/TargetRegisterInfo.h b/include/llvm/CodeGen/TargetRegisterInfo.h
index 81907538fb0b..538a5845466c 100644
--- a/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -21,11 +21,11 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/MC/LaneBitmask.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Printable.h"
 #include <cassert>
@@ -238,12 +238,12 @@ private:
 
 protected:
   TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
-                     regclass_iterator RegClassBegin,
-                     regclass_iterator RegClassEnd,
+                     regclass_iterator RCB,
+                     regclass_iterator RCE,
                      const char *const *SRINames,
                      const LaneBitmask *SRILaneMasks,
                      LaneBitmask CoveringLanes,
-                     const RegClassInfo *const RSI,
+                     const RegClassInfo *const RCIs,
                      unsigned Mode = 0);
   virtual ~TargetRegisterInfo();
 
@@ -444,6 +444,13 @@ public:
     return false;
   }
 
+  /// Returns the original SrcReg unless it is the target of a copy-like
+  /// operation, in which case we chain backwards through all such operations
+  /// to the ultimate source register.  If a physical register is encountered,
+  /// we stop the search.
+  virtual unsigned lookThruCopyLike(unsigned SrcReg,
+                                    const MachineRegisterInfo *MRI) const;
+
   /// Return a null-terminated list of all of the callee-saved registers on
   /// this target. The register should be in the order of desired callee-save
   /// stack frame offset. The first register is closest to the incoming stack
@@ -752,6 +759,9 @@ public:
   virtual const RegClassWeight &getRegClassWeight(
     const TargetRegisterClass *RC) const = 0;
 
+  /// Returns size in bits of a phys/virtual/generic register.
+  unsigned getRegSizeInBits(unsigned Reg, const MachineRegisterInfo &MRI) const;
+
   /// Get the weight in units of pressure for this register unit.
   virtual unsigned getRegUnitWeight(unsigned RegUnit) const = 0;
 
@@ -961,7 +971,7 @@ public:
   //===--------------------------------------------------------------------===//
   /// Subtarget Hooks
 
-  /// \brief SrcRC and DstRC will be morphed into NewRC if this returns true.
+  /// SrcRC and DstRC will be morphed into NewRC if this returns true.
   virtual bool shouldCoalesce(MachineInstr *MI,
                               const TargetRegisterClass *SrcRC,
                               unsigned SubReg,
@@ -985,6 +995,12 @@ public:
   /// of the set as well.
   bool checkAllSuperRegsMarked(const BitVector &RegisterSet,
       ArrayRef<MCPhysReg> Exceptions = ArrayRef<MCPhysReg>()) const;
+
+  virtual const TargetRegisterClass *
+  getConstrainedRegClassForOperand(const MachineOperand &MO,
+                                   const MachineRegisterInfo &MRI) const {
+    return nullptr;
+  }
 };
 
 //===----------------------------------------------------------------------===//
@@ -1151,7 +1167,8 @@ struct VirtReg2IndexFunctor {
 ///
 /// Usage: OS << printReg(Reg, TRI, SubRegIdx) << '\n';
 Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI = nullptr,
-                   unsigned SubRegIdx = 0);
+                   unsigned SubIdx = 0,
+                   const MachineRegisterInfo *MRI = nullptr);
 
 /// Create Printable object to print register units on a \ref raw_ostream.
 ///
@@ -1163,11 +1180,11 @@ Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI = nullptr,
 /// Usage: OS << printRegUnit(Unit, TRI) << '\n';
 Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI);
 
-/// \brief Create Printable object to print virtual registers and physical
+/// Create Printable object to print virtual registers and physical
 /// registers on a \ref raw_ostream.
 Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI);
 
-/// \brief Create Printable object to print register classes or register banks
+/// Create Printable object to print register classes or register banks
 /// on a \ref raw_ostream.
 Printable printRegClassOrBank(unsigned Reg, const MachineRegisterInfo &RegInfo,
                               const TargetRegisterInfo *TRI);
diff --git a/include/llvm/CodeGen/TargetSchedule.h b/include/llvm/CodeGen/TargetSchedule.h
index 1044f0bd27e6..6173925e23a1 100644
--- a/include/llvm/CodeGen/TargetSchedule.h
+++ b/include/llvm/CodeGen/TargetSchedule.h
@@ -19,6 +19,7 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/MC/MCSchedule.h"
 
@@ -45,24 +46,23 @@ class TargetSchedModel {
 public:
   TargetSchedModel() : SchedModel(MCSchedModel::GetDefaultSchedModel()) {}
 
-  /// \brief Initialize the machine model for instruction scheduling.
+  /// Initialize the machine model for instruction scheduling.
   ///
   /// The machine model API keeps a copy of the top-level MCSchedModel table
   /// indices and may query TargetSubtargetInfo and TargetInstrInfo to resolve
   /// dynamic properties.
-  void init(const MCSchedModel &sm, const TargetSubtargetInfo *sti,
-            const TargetInstrInfo *tii);
+  void init(const TargetSubtargetInfo *TSInfo);
 
   /// Return the MCSchedClassDesc for this instruction.
   const MCSchedClassDesc *resolveSchedClass(const MachineInstr *MI) const;
 
-  /// \brief TargetSubtargetInfo getter.
+  /// TargetSubtargetInfo getter.
   const TargetSubtargetInfo *getSubtargetInfo() const { return STI; }
 
-  /// \brief TargetInstrInfo getter.
+  /// TargetInstrInfo getter.
   const TargetInstrInfo *getInstrInfo() const { return TII; }
 
-  /// \brief Return true if this machine model includes an instruction-level
+  /// Return true if this machine model includes an instruction-level
   /// scheduling model.
   ///
   /// This is more detailed than the course grain IssueWidth and default
@@ -71,7 +71,7 @@ public:
 
   const MCSchedModel *getMCSchedModel() const { return &SchedModel; }
 
-  /// \brief Return true if this machine model includes cycle-to-cycle itinerary
+  /// Return true if this machine model includes cycle-to-cycle itinerary
   /// data.
   ///
   /// This models scheduling at each stage in the processor pipeline.
@@ -83,35 +83,35 @@ public:
     return nullptr;
   }
 
-  /// \brief Return true if this machine model includes an instruction-level
+  /// Return true if this machine model includes an instruction-level
   /// scheduling model or cycle-to-cycle itinerary data.
   bool hasInstrSchedModelOrItineraries() const {
     return hasInstrSchedModel() || hasInstrItineraries();
   }
 
-  /// \brief Identify the processor corresponding to the current subtarget.
+  /// Identify the processor corresponding to the current subtarget.
   unsigned getProcessorID() const { return SchedModel.getProcessorID(); }
 
-  /// \brief Maximum number of micro-ops that may be scheduled per cycle.
+  /// Maximum number of micro-ops that may be scheduled per cycle.
   unsigned getIssueWidth() const { return SchedModel.IssueWidth; }
 
-  /// \brief Return true if new group must begin.
+  /// Return true if new group must begin.
   bool mustBeginGroup(const MachineInstr *MI,
                           const MCSchedClassDesc *SC = nullptr) const;
-  /// \brief Return true if current group must end.
+  /// Return true if current group must end.
   bool mustEndGroup(const MachineInstr *MI,
                           const MCSchedClassDesc *SC = nullptr) const;
 
-  /// \brief Return the number of issue slots required for this MI.
+  /// Return the number of issue slots required for this MI.
   unsigned getNumMicroOps(const MachineInstr *MI,
                           const MCSchedClassDesc *SC = nullptr) const;
 
-  /// \brief Get the number of kinds of resources for this target.
+  /// Get the number of kinds of resources for this target.
   unsigned getNumProcResourceKinds() const {
     return SchedModel.getNumProcResourceKinds();
   }
 
-  /// \brief Get a processor resource by ID for convenience.
+  /// Get a processor resource by ID for convenience.
   const MCProcResourceDesc *getProcResource(unsigned PIdx) const {
     return SchedModel.getProcResource(PIdx);
   }
@@ -126,7 +126,7 @@ public:
 
   using ProcResIter = const MCWriteProcResEntry *;
 
-  // \brief Get an iterator into the processor resources consumed by this
+  // Get an iterator into the processor resources consumed by this
   // scheduling class.
   ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const {
     // The subtarget holds a single resource table for all processors.
@@ -136,34 +136,34 @@ public:
     return STI->getWriteProcResEnd(SC);
   }
 
-  /// \brief Multiply the number of units consumed for a resource by this factor
+  /// Multiply the number of units consumed for a resource by this factor
   /// to normalize it relative to other resources.
   unsigned getResourceFactor(unsigned ResIdx) const {
     return ResourceFactors[ResIdx];
   }
 
-  /// \brief Multiply number of micro-ops by this factor to normalize it
+  /// Multiply number of micro-ops by this factor to normalize it
   /// relative to other resources.
   unsigned getMicroOpFactor() const {
     return MicroOpFactor;
   }
 
-  /// \brief Multiply cycle count by this factor to normalize it relative to
+  /// Multiply cycle count by this factor to normalize it relative to
   /// other resources. This is the number of resource units per cycle.
   unsigned getLatencyFactor() const {
     return ResourceLCM;
   }
 
-  /// \brief Number of micro-ops that may be buffered for OOO execution.
+  /// Number of micro-ops that may be buffered for OOO execution.
   unsigned getMicroOpBufferSize() const { return SchedModel.MicroOpBufferSize; }
 
-  /// \brief Number of resource units that may be buffered for OOO execution.
+  /// Number of resource units that may be buffered for OOO execution.
   /// \return The buffer size in resource units or -1 for unlimited.
   int getResourceBufferSize(unsigned PIdx) const {
     return SchedModel.getProcResource(PIdx)->BufferSize;
   }
 
-  /// \brief Compute operand latency based on the available machine model.
+  /// Compute operand latency based on the available machine model.
   ///
   /// Compute and return the latency of the given data dependent def and use
   /// when the operand indices are already known. UseMI may be NULL for an
@@ -172,7 +172,7 @@ public:
                                  const MachineInstr *UseMI, unsigned UseOperIdx)
     const;
 
-  /// \brief Compute the instruction latency based on the available machine
+  /// Compute the instruction latency based on the available machine
   /// model.
   ///
   /// Compute and return the expected latency of this instruction independent of
@@ -185,18 +185,20 @@ public:
   /// if converter after moving it to TargetSchedModel).
   unsigned computeInstrLatency(const MachineInstr *MI,
                                bool UseDefaultDefLatency = true) const;
+  unsigned computeInstrLatency(const MCInst &Inst) const;
   unsigned computeInstrLatency(unsigned Opcode) const;
 
 
-  /// \brief Output dependency latency of a pair of defs of the same register.
+  /// Output dependency latency of a pair of defs of the same register.
   ///
   /// This is typically one cycle.
-  unsigned computeOutputLatency(const MachineInstr *DefMI, unsigned DefIdx,
+  unsigned computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
                                 const MachineInstr *DepMI) const;
 
-  /// \brief Compute the reciprocal throughput of the given instruction.
-  Optional<double> computeInstrRThroughput(const MachineInstr *MI) const;
-  Optional<double> computeInstrRThroughput(unsigned Opcode) const;
+  /// Compute the reciprocal throughput of the given instruction.
+  double computeReciprocalThroughput(const MachineInstr *MI) const;
+  double computeReciprocalThroughput(const MCInst &MI) const;
+  double computeReciprocalThroughput(unsigned Opcode) const;
 };
 
 } // end namespace llvm
diff --git a/include/llvm/CodeGen/TargetSubtargetInfo.h b/include/llvm/CodeGen/TargetSubtargetInfo.h
index 576522aef466..227e591f5a7d 100644
--- a/include/llvm/CodeGen/TargetSubtargetInfo.h
+++ b/include/llvm/CodeGen/TargetSubtargetInfo.h
@@ -144,7 +144,7 @@ public:
     return 0;
   }
 
-  /// \brief True if the subtarget should run MachineScheduler after aggressive
+  /// True if the subtarget should run MachineScheduler after aggressive
   /// coalescing.
   ///
   /// This currently replaces the SelectionDAG scheduler with the "source" order
@@ -152,14 +152,14 @@ public:
   /// TargetLowering preference). It does not yet disable the postRA scheduler.
   virtual bool enableMachineScheduler() const;
 
-  /// \brief Support printing of [latency:throughput] comment in output .S file.
+  /// Support printing of [latency:throughput] comment in output .S file.
   virtual bool supportPrintSchedInfo() const { return false; }
 
-  /// \brief True if the machine scheduler should disable the TLI preference
+  /// True if the machine scheduler should disable the TLI preference
   /// for preRA scheduling with the source level scheduler.
   virtual bool enableMachineSchedDefaultSched() const { return true; }
 
-  /// \brief True if the subtarget should enable joining global copies.
+  /// True if the subtarget should enable joining global copies.
   ///
   /// By default this is enabled if the machine scheduler is enabled, but
   /// can be overridden.
@@ -171,10 +171,13 @@ public:
   /// which is the preferred way to influence this.
   virtual bool enablePostRAScheduler() const;
 
-  /// \brief True if the subtarget should run the atomic expansion pass.
+  /// True if the subtarget should run the atomic expansion pass.
   virtual bool enableAtomicExpand() const;
 
-  /// \brief Override generic scheduling policy within a region.
+  /// True if the subtarget should run the indirectbr expansion pass.
+  virtual bool enableIndirectBrExpand() const;
+
+  /// Override generic scheduling policy within a region.
   ///
   /// This is a convenient way for targets that don't provide any custom
   /// scheduling heuristics (no custom MachineSchedStrategy) to make
@@ -182,7 +185,7 @@ public:
   virtual void overrideSchedPolicy(MachineSchedPolicy &Policy,
                                    unsigned NumRegionInstrs) const {}
 
-  // \brief Perform target specific adjustments to the latency of a schedule
+  // Perform target specific adjustments to the latency of a schedule
   // dependency.
   virtual void adjustSchedDependency(SUnit *def, SUnit *use, SDep &dep) const {}
 
@@ -197,13 +200,13 @@ public:
     return CriticalPathRCs.clear();
   }
 
-  // \brief Provide an ordered list of schedule DAG mutations for the post-RA
+  // Provide an ordered list of schedule DAG mutations for the post-RA
   // scheduler.
   virtual void getPostRAMutations(
       std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
   }
 
-  // \brief Provide an ordered list of schedule DAG mutations for the machine
+  // Provide an ordered list of schedule DAG mutations for the machine
   // pipeliner.
   virtual void getSMSMutations(
       std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
@@ -215,25 +218,25 @@ public:
     return CodeGenOpt::Default;
   }
 
-  /// \brief True if the subtarget should run the local reassignment
+  /// True if the subtarget should run the local reassignment
   /// heuristic of the register allocator.
   /// This heuristic may be compile time intensive, \p OptLevel provides
   /// a finer grain to tune the register allocator.
   virtual bool enableRALocalReassignment(CodeGenOpt::Level OptLevel) const;
 
-  /// \brief True if the subtarget should consider the cost of local intervals
+  /// True if the subtarget should consider the cost of local intervals
   /// created by a split candidate when choosing the best split candidate. This
   /// heuristic may be compile time intensive.
   virtual bool enableAdvancedRASplitCost() const;
 
-  /// \brief Enable use of alias analysis during code generation (during MI
+  /// Enable use of alias analysis during code generation (during MI
   /// scheduling, DAGCombine, etc.).
   virtual bool useAA() const;
 
-  /// \brief Enable the use of the early if conversion pass.
+  /// Enable the use of the early if conversion pass.
   virtual bool enableEarlyIfConversion() const { return false; }
 
-  /// \brief Return PBQPConstraint(s) for the target.
+  /// Return PBQPConstraint(s) for the target.
   ///
   /// Override to provide custom PBQP constraints.
   virtual std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const {
@@ -246,8 +249,11 @@ public:
   virtual bool enableSubRegLiveness() const { return false; }
 
   /// Returns string representation of scheduler comment
-  std::string getSchedInfoStr(const MachineInstr &MI) const override;
+  std::string getSchedInfoStr(const MachineInstr &MI) const;
   std::string getSchedInfoStr(MCInst const &MCI) const override;
+
+  /// This is called after a .mir file was loaded.
+  virtual void mirFileLoaded(MachineFunction &MF) const;
 };
 
 } // end namespace llvm
diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h
index 40d501edde10..d2ef4a94f8e2 100644
--- a/include/llvm/CodeGen/ValueTypes.h
+++ b/include/llvm/CodeGen/ValueTypes.h
@@ -16,8 +16,8 @@
 #ifndef LLVM_CODEGEN_VALUETYPES_H
 #define LLVM_CODEGEN_VALUETYPES_H
 
-#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/MachineValueType.h"
 #include "llvm/Support/MathExtras.h"
 #include <cassert>
 #include <cstdint>
diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td
index 73c7fb4ce4b3..0abb4ece1d14 100644
--- a/include/llvm/CodeGen/ValueTypes.td
+++ b/include/llvm/CodeGen/ValueTypes.td
@@ -8,8 +8,8 @@
 //===----------------------------------------------------------------------===//
 //
 // Value types - These values correspond to the register types defined in the
-// ValueTypes.h file.  If you update anything here, you must update it there as
-// well!
+// MachineValueTypes.h file.  If you update anything here, you must update it
+// there as well!
 //
 //===----------------------------------------------------------------------===//
 
@@ -69,7 +69,7 @@ def v4i32  : ValueType<128, 43>;   //  4 x i32 vector value
 def v8i32  : ValueType<256, 44>;   //  8 x i32 vector value
 def v16i32 : ValueType<512, 45>;   // 16 x i32 vector value
 def v32i32 : ValueType<1024,46>;   // 32 x i32 vector value
-def v64i32 : ValueType<2048,47>;   // 32 x i32 vector value
+def v64i32 : ValueType<2048,47>;   // 64 x i32 vector value
 
 def v1i64  : ValueType<64 , 48>;   //  1 x i64 vector value
 def v2i64  : ValueType<128, 49>;   //  2 x i64 vector value
@@ -145,6 +145,7 @@ def x86mmx : ValueType<64 , 109>;   // X86 MMX value
 def FlagVT : ValueType<0  , 110>;   // Pre-RA sched glue
 def isVoid : ValueType<0  , 111>;   // Produces no value
 def untyped: ValueType<8  , 112>;   // Produces an untyped value
+def ExceptRef: ValueType<0, 113>;   // WebAssembly's except_ref type
 def token  : ValueType<0  , 248>;   // TokenTy
 def MetadataVT: ValueType<0, 249>;  // Metadata
 
diff --git a/include/llvm/CodeGen/VirtRegMap.h b/include/llvm/CodeGen/VirtRegMap.h
index 3b06f0393114..6a8e50a7e5f5 100644
--- a/include/llvm/CodeGen/VirtRegMap.h
+++ b/include/llvm/CodeGen/VirtRegMap.h
@@ -90,24 +90,24 @@ class TargetInstrInfo;
 
     void grow();
 
-    /// @brief returns true if the specified virtual register is
+    /// returns true if the specified virtual register is
     /// mapped to a physical register
     bool hasPhys(unsigned virtReg) const {
       return getPhys(virtReg) != NO_PHYS_REG;
     }
 
-    /// @brief returns the physical register mapped to the specified
+    /// returns the physical register mapped to the specified
     /// virtual register
     unsigned getPhys(unsigned virtReg) const {
       assert(TargetRegisterInfo::isVirtualRegister(virtReg));
       return Virt2PhysMap[virtReg];
     }
 
-    /// @brief creates a mapping for the specified virtual register to
+    /// creates a mapping for the specified virtual register to
     /// the specified physical register
     void assignVirt2Phys(unsigned virtReg, MCPhysReg physReg);
 
-    /// @brief clears the specified virtual register's, physical
+    /// clears the specified virtual register's, physical
     /// register mapping
     void clearVirt(unsigned virtReg) {
       assert(TargetRegisterInfo::isVirtualRegister(virtReg));
@@ -116,26 +116,26 @@ class TargetInstrInfo;
       Virt2PhysMap[virtReg] = NO_PHYS_REG;
     }
 
-    /// @brief clears all virtual to physical register mappings
+    /// clears all virtual to physical register mappings
     void clearAllVirt() {
       Virt2PhysMap.clear();
       grow();
     }
 
-    /// @brief returns true if VirtReg is assigned to its preferred physreg.
+    /// returns true if VirtReg is assigned to its preferred physreg.
     bool hasPreferredPhys(unsigned VirtReg);
 
-    /// @brief returns true if VirtReg has a known preferred register.
+    /// returns true if VirtReg has a known preferred register.
     /// This returns false if VirtReg has a preference that is a virtual
     /// register that hasn't been assigned yet.
     bool hasKnownPreference(unsigned VirtReg);
 
-    /// @brief records virtReg is a split live interval from SReg.
+    /// records virtReg is a split live interval from SReg.
     void setIsSplitFromReg(unsigned virtReg, unsigned SReg) {
       Virt2SplitMap[virtReg] = SReg;
     }
 
-    /// @brief returns the live interval virtReg is split from.
+    /// returns the live interval virtReg is split from.
     unsigned getPreSplitReg(unsigned virtReg) const {
       return Virt2SplitMap[virtReg];
     }
@@ -149,7 +149,7 @@ class TargetInstrInfo;
       return Orig ? Orig : VirtReg;
     }
 
-    /// @brief returns true if the specified virtual register is not
+    /// returns true if the specified virtual register is not
     /// mapped to a stack slot or rematerialized.
     bool isAssignedReg(unsigned virtReg) const {
       if (getStackSlot(virtReg) == NO_STACK_SLOT)
@@ -159,20 +159,20 @@ class TargetInstrInfo;
       return (Virt2SplitMap[virtReg] && Virt2PhysMap[virtReg] != NO_PHYS_REG);
     }
 
-    /// @brief returns the stack slot mapped to the specified virtual
+    /// returns the stack slot mapped to the specified virtual
     /// register
     int getStackSlot(unsigned virtReg) const {
       assert(TargetRegisterInfo::isVirtualRegister(virtReg));
       return Virt2StackSlotMap[virtReg];
     }
 
-    /// @brief create a mapping for the specifed virtual register to
+    /// create a mapping for the specifed virtual register to
     /// the next available stack slot
     int assignVirt2StackSlot(unsigned virtReg);
 
-    /// @brief create a mapping for the specified virtual register to
+    /// create a mapping for the specified virtual register to
     /// the specified stack slot
-    void assignVirt2StackSlot(unsigned virtReg, int frameIndex);
+    void assignVirt2StackSlot(unsigned virtReg, int SS);
 
     void print(raw_ostream &OS, const Module* M = nullptr) const override;
     void dump() const;
diff --git a/include/llvm/CodeGen/WasmEHFuncInfo.h b/include/llvm/CodeGen/WasmEHFuncInfo.h
new file mode 100644
index 000000000000..3ad6760d8813
--- /dev/null
+++ b/include/llvm/CodeGen/WasmEHFuncInfo.h
@@ -0,0 +1,80 @@
+//===--- llvm/CodeGen/WasmEHFuncInfo.h --------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Data structures for Wasm exception handling schemes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_WASMEHFUNCINFO_H
+#define LLVM_CODEGEN_WASMEHFUNCINFO_H
+
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/IR/BasicBlock.h"
+
+namespace llvm {
+
+using BBOrMBB = PointerUnion<const BasicBlock *, MachineBasicBlock *>;
+
+struct WasmEHFuncInfo {
+  // When there is an entry <A, B>, if an exception is not caught by A, it
+  // should next unwind to the EH pad B.
+  DenseMap<BBOrMBB, BBOrMBB> EHPadUnwindMap;
+  // For entry <A, B>, A is a BB with an instruction that may throw
+  // (invoke/cleanupret in LLVM IR, call/rethrow in the backend) and B is an EH
+  // pad that A unwinds to.
+  DenseMap<BBOrMBB, BBOrMBB> ThrowUnwindMap;
+
+  // Helper functions
+  const BasicBlock *getEHPadUnwindDest(const BasicBlock *BB) const {
+    return EHPadUnwindMap.lookup(BB).get<const BasicBlock *>();
+  }
+  void setEHPadUnwindDest(const BasicBlock *BB, const BasicBlock *Dest) {
+    EHPadUnwindMap[BB] = Dest;
+  }
+  const BasicBlock *getThrowUnwindDest(BasicBlock *BB) const {
+    return ThrowUnwindMap.lookup(BB).get<const BasicBlock *>();
+  }
+  void setThrowUnwindDest(const BasicBlock *BB, const BasicBlock *Dest) {
+    ThrowUnwindMap[BB] = Dest;
+  }
+  bool hasEHPadUnwindDest(const BasicBlock *BB) const {
+    return EHPadUnwindMap.count(BB);
+  }
+  bool hasThrowUnwindDest(const BasicBlock *BB) const {
+    return ThrowUnwindMap.count(BB);
+  }
+
+  MachineBasicBlock *getEHPadUnwindDest(MachineBasicBlock *MBB) const {
+    return EHPadUnwindMap.lookup(MBB).get<MachineBasicBlock *>();
+  }
+  void setEHPadUnwindDest(MachineBasicBlock *MBB, MachineBasicBlock *Dest) {
+    EHPadUnwindMap[MBB] = Dest;
+  }
+  MachineBasicBlock *getThrowUnwindDest(MachineBasicBlock *MBB) const {
+    return ThrowUnwindMap.lookup(MBB).get<MachineBasicBlock *>();
+  }
+  void setThrowUnwindDest(MachineBasicBlock *MBB, MachineBasicBlock *Dest) {
+    ThrowUnwindMap[MBB] = Dest;
+  }
+  bool hasEHPadUnwindDest(MachineBasicBlock *MBB) const {
+    return EHPadUnwindMap.count(MBB);
+  }
+  bool hasThrowUnwindDest(MachineBasicBlock *MBB) const {
+    return ThrowUnwindMap.count(MBB);
+  }
+};
+
+// Analyze the IR in the given function to build WasmEHFuncInfo.
+void calculateWasmEHInfo(const Function *F, WasmEHFuncInfo &EHInfo);
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_WASMEHFUNCINFO_H