137 files changed, 10585 insertions, 2748 deletions
diff --git a/utils/FileCheck/CMakeLists.txt b/utils/FileCheck/CMakeLists.txt
index 999320f78af2..32e948a1a19e 100644
--- a/utils/FileCheck/CMakeLists.txt
+++ b/utils/FileCheck/CMakeLists.txt
@@ -2,4 +2,4 @@ add_llvm_utility(FileCheck
   FileCheck.cpp
   )
 
-target_link_libraries(FileCheck LLVMSupport)
+target_link_libraries(FileCheck PRIVATE LLVMSupport)
diff --git a/utils/FileCheck/FileCheck.cpp b/utils/FileCheck/FileCheck.cpp
index 9d808cc875c0..7db97301637d 100644
--- a/utils/FileCheck/FileCheck.cpp
+++ b/utils/FileCheck/FileCheck.cpp
@@ -62,6 +62,10 @@ static cl::list<std::string> ImplicitCheckNot(
              "this pattern occur which are not matched by a positive pattern"),
     cl::value_desc("pattern"));
 
+static cl::list<std::string> GlobalDefines("D", cl::Prefix,
+    cl::desc("Define a variable to be used in capture patterns."),
+    cl::value_desc("VAR=VALUE"));
+
 static cl::opt<bool> AllowEmptyInput(
     "allow-empty", cl::init(false),
     cl::desc("Allow the input file to be empty. This is useful when making\n"
@@ -1295,6 +1299,9 @@ bool CheckInput(SourceMgr &SM, StringRef Buffer,
   /// VariableTable - This holds all the current filecheck variables.
   StringMap<StringRef> VariableTable;
 
+  for (const auto& Def : GlobalDefines)
+    VariableTable.insert(StringRef(Def).split('='));
+
   unsigned i = 0, j = 0, e = CheckStrings.size();
   while (true) {
     StringRef CheckRegion;
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
index 1f8e1b125889..f2d304bfcf5b 100644
--- a/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -205,6 +205,9 @@ struct ClassInfo {
   /// For custom match classes: the diagnostic kind for when the predicate fails.
   std::string DiagnosticType;
 
+  /// For custom match classes: the diagnostic string for when the predicate fails.
+  std::string DiagnosticString;
+
   /// Is this operand optional and not always required.
   bool IsOptional;
 
@@ -701,13 +704,13 @@ public:
   /// Map of AsmOperandClass records to their class information.
   std::map<Record*, ClassInfo*> AsmOperandClasses;
 
+  /// Map of RegisterClass records to their class information.
+  std::map<Record*, ClassInfo*> RegisterClassClasses;
+
 private:
   /// Map of token to class information which has already been constructed.
   std::map<std::string, ClassInfo*> TokenClasses;
 
-  /// Map of RegisterClass records to their class information.
-  std::map<Record*, ClassInfo*> RegisterClassClasses;
-
 private:
   /// getTokenClass - Lookup or create the class for the given token.
   ClassInfo *getTokenClass(StringRef Token);
@@ -1279,6 +1282,19 @@ buildRegisterClasses(SmallPtrSetImpl<Record*> &SingletonRegisters) {
     } else
       CI->ValueName = CI->ValueName + "," + RC.getName();
 
+    Init *DiagnosticType = Def->getValueInit("DiagnosticType");
+    if (StringInit *SI = dyn_cast<StringInit>(DiagnosticType))
+      CI->DiagnosticType = SI->getValue();
+
+    Init *DiagnosticString = Def->getValueInit("DiagnosticString");
+    if (StringInit *SI = dyn_cast<StringInit>(DiagnosticString))
+      CI->DiagnosticString = SI->getValue();
+
+    // If we have a diagnostic string but the diagnostic type is not specified
+    // explicitly, create an anonymous diagnostic type.
+    if (!CI->DiagnosticString.empty() && CI->DiagnosticType.empty())
+      CI->DiagnosticType = RC.getName();
+
     RegisterClassClasses.insert(std::make_pair(Def, CI));
   }
 
@@ -1357,11 +1373,17 @@ void AsmMatcherInfo::buildOperandClasses() {
     if (StringInit *SI = dyn_cast<StringInit>(PRMName))
       CI->ParserMethod = SI->getValue();
 
-    // Get the diagnostic type or leave it as empty.
-    // Get the parse method name or leave it as empty.
+    // Get the diagnostic type and string or leave them as empty.
     Init *DiagnosticType = Rec->getValueInit("DiagnosticType");
     if (StringInit *SI = dyn_cast<StringInit>(DiagnosticType))
       CI->DiagnosticType = SI->getValue();
+    Init *DiagnosticString = Rec->getValueInit("DiagnosticString");
+    if (StringInit *SI = dyn_cast<StringInit>(DiagnosticString))
+      CI->DiagnosticString = SI->getValue();
+    // If we have a DiagnosticString, we need a DiagnosticType for use within
+    // the matcher.
+    if (!CI->DiagnosticString.empty() && CI->DiagnosticType.empty())
+      CI->DiagnosticType = CI->ClassName;
 
     Init *IsOptional = Rec->getValueInit("IsOptional");
     if (BitInit *BI = dyn_cast<BitInit>(IsOptional))
@@ -1847,13 +1869,25 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
   CvtOS << "  assert(Kind < CVT_NUM_SIGNATURES && \"Invalid signature!\");\n";
   CvtOS << "  const uint8_t *Converter = ConversionTable[Kind];\n";
   if (HasOptionalOperands) {
-    CvtOS << "  unsigned NumDefaults = 0;\n";
+    size_t MaxNumOperands = 0;
+    for (const auto &MI : Infos) {
+      MaxNumOperands = std::max(MaxNumOperands, MI->AsmOperands.size());
+    }
+    CvtOS << "  unsigned DefaultsOffset[" << (MaxNumOperands + 1)
+          << "] = { 0 };\n";
+    CvtOS << "  assert(OptionalOperandsMask.size() == " << (MaxNumOperands)
+          << ");\n";
+    CvtOS << "  for (unsigned i = 0, NumDefaults = 0; i < " << (MaxNumOperands)
+          << "; ++i) {\n";
+    CvtOS << "    DefaultsOffset[i + 1] = NumDefaults;\n";
+    CvtOS << "    NumDefaults += (OptionalOperandsMask[i] ? 1 : 0);\n";
+    CvtOS << "  }\n";
   }
   CvtOS << "  unsigned OpIdx;\n";
   CvtOS << "  Inst.setOpcode(Opcode);\n";
   CvtOS << "  for (const uint8_t *p = Converter; *p; p+= 2) {\n";
   if (HasOptionalOperands) {
-    CvtOS << "    OpIdx = *(p + 1) - NumDefaults;\n";
+    CvtOS << "    OpIdx = *(p + 1) - DefaultsOffset[*(p + 1)];\n";
   } else {
     CvtOS << "    OpIdx = *(p + 1);\n";
   }
@@ -1988,7 +2022,6 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
                 << "        " << Op.Class->DefaultMethod << "()"
                 << "->" << Op.Class->RenderMethod << "(Inst, "
                 << OpInfo.MINumOperands << ");\n"
-                << "        ++NumDefaults;\n"
                 << "      } else {\n"
                 << "        static_cast<" << TargetOperandClass
                 << "&>(*Operands[OpIdx])." << Op.Class->RenderMethod
@@ -2158,7 +2191,18 @@ static void emitMatchClassEnumeration(CodeGenTarget &Target,
   OS << "enum MatchClassKind {\n";
   OS << "  InvalidMatchClass = 0,\n";
   OS << "  OptionalMatchClass = 1,\n";
+  ClassInfo::ClassInfoKind LastKind = ClassInfo::Token;
+  StringRef LastName = "OptionalMatchClass";
   for (const auto &CI : Infos) {
+    if (LastKind == ClassInfo::Token && CI.Kind != ClassInfo::Token) {
+      OS << "  MCK_LAST_TOKEN = " << LastName << ",\n";
+    } else if (LastKind < ClassInfo::UserClass0 &&
+               CI.Kind >= ClassInfo::UserClass0) {
+      OS << "  MCK_LAST_REGISTER = " << LastName << ",\n";
+    }
+    LastKind = (ClassInfo::ClassInfoKind)CI.Kind;
+    LastName = CI.Name;
+
     OS << "  " << CI.Name << ", // ";
     if (CI.Kind == ClassInfo::Token) {
       OS << "'" << CI.ValueName << "'\n";
@@ -2177,6 +2221,64 @@ static void emitMatchClassEnumeration(CodeGenTarget &Target,
   OS << "}\n\n";
 }
 
+/// emitMatchClassDiagStrings - Emit a function to get the diagnostic text to be
+/// used when an assembly operand does not match the expected operand class.
+static void emitOperandMatchErrorDiagStrings(AsmMatcherInfo &Info, raw_ostream &OS) {
+  // If the target does not use DiagnosticString for any operands, don't emit
+  // an unused function.
+  if (std::all_of(
+          Info.Classes.begin(), Info.Classes.end(),
+          [](const ClassInfo &CI) { return CI.DiagnosticString.empty(); }))
+    return;
+
+  OS << "static const char *getMatchKindDiag(" << Info.Target.getName()
+     << "AsmParser::" << Info.Target.getName()
+     << "MatchResultTy MatchResult) {\n";
+  OS << "  switch (MatchResult) {\n";
+
+  for (const auto &CI: Info.Classes) {
+    if (!CI.DiagnosticString.empty()) {
+      assert(!CI.DiagnosticType.empty() &&
+             "DiagnosticString set without DiagnosticType");
+      OS << "  case " << Info.Target.getName()
+         << "AsmParser::Match_" << CI.DiagnosticType << ":\n";
+      OS << "    return \"" << CI.DiagnosticString << "\";\n";
+    }
+  }
+
+  OS << "  default:\n";
+  OS << "    return nullptr;\n";
+
+  OS << "  }\n";
+  OS << "}\n\n";
+}
+
+static void emitRegisterMatchErrorFunc(AsmMatcherInfo &Info, raw_ostream &OS) {
+  OS << "static unsigned getDiagKindFromRegisterClass(MatchClassKind "
+        "RegisterClass) {\n";
+  if (std::none_of(Info.Classes.begin(), Info.Classes.end(),
+                   [](const ClassInfo &CI) {
+                     return CI.isRegisterClass() && !CI.DiagnosticType.empty();
+                   })) {
+    OS << "  return MCTargetAsmParser::Match_InvalidOperand;\n";
+  } else {
+    OS << "  switch (RegisterClass) {\n";
+    for (const auto &CI: Info.Classes) {
+      if (CI.isRegisterClass() && !CI.DiagnosticType.empty()) {
+        OS << "  case " << CI.Name << ":\n";
+        OS << "    return " << Info.Target.getName() << "AsmParser::Match_"
+           << CI.DiagnosticType << ";\n";
+      }
+    }
+
+    OS << "  default:\n";
+    OS << "    return MCTargetAsmParser::Match_InvalidOperand;\n";
+
+    OS << "  }\n";
+  }
+  OS << "}\n\n";
+}
+
 /// emitValidateOperandClass - Emit the function to validate an operand class.
 static void emitValidateOperandClass(AsmMatcherInfo &Info,
                                      raw_ostream &OS) {
@@ -2191,7 +2293,7 @@ static void emitValidateOperandClass(AsmMatcherInfo &Info,
 
   // Check for Token operands first.
   // FIXME: Use a more specific diagnostic type.
-  OS << "  if (Operand.isToken())\n";
+  OS << "  if (Operand.isToken() && Kind <= MCK_LAST_TOKEN)\n";
   OS << "    return isSubclass(matchTokenString(Operand.getToken()), Kind) ?\n"
      << "             MCTargetAsmParser::Match_Success :\n"
      << "             MCTargetAsmParser::Match_InvalidOperand;\n\n";
@@ -2227,8 +2329,12 @@ static void emitValidateOperandClass(AsmMatcherInfo &Info,
        << "; break;\n";
   OS << "    }\n";
   OS << "    return isSubclass(OpKind, Kind) ? "
-     << "MCTargetAsmParser::Match_Success :\n                             "
-     << "         MCTargetAsmParser::Match_InvalidOperand;\n  }\n\n";
+     << "(unsigned)MCTargetAsmParser::Match_Success :\n                     "
+     << "                 getDiagKindFromRegisterClass(Kind);\n  }\n\n";
+
+  // Expected operand is a register, but actual is not.
+  OS << "  if (Kind > MCK_LAST_TOKEN && Kind <= MCK_LAST_REGISTER)\n";
+  OS << "    return getDiagKindFromRegisterClass(Kind);\n\n";
 
   // Generic fallthrough match failure case for operands that don't have
   // specialized diagnostic types.
@@ -2332,7 +2438,9 @@ static void emitMatchRegisterName(CodeGenTarget &Target, Record *AsmParser,
 
   OS << "static unsigned MatchRegisterName(StringRef Name) {\n";
 
-  StringMatcher("Name", Matches, OS).Emit();
+  bool IgnoreDuplicates =
+      AsmParser->getValueAsBit("AllowDuplicateRegisterNames");
+  StringMatcher("Name", Matches, OS).Emit(0, IgnoreDuplicates);
 
   OS << "  return 0;\n";
   OS << "}\n\n";
@@ -2363,7 +2471,9 @@ static void emitMatchRegisterAltName(CodeGenTarget &Target, Record *AsmParser,
 
   OS << "static unsigned MatchRegisterAltName(StringRef Name) {\n";
 
-  StringMatcher("Name", Matches, OS).Emit();
+  bool IgnoreDuplicates =
+      AsmParser->getValueAsBit("AllowDuplicateRegisterNames");
+  StringMatcher("Name", Matches, OS).Emit(0, IgnoreDuplicates);
 
   OS << "  return 0;\n";
   OS << "}\n\n";
@@ -2377,6 +2487,10 @@ static void emitOperandDiagnosticTypes(AsmMatcherInfo &Info, raw_ostream &OS) {
     if (!OpClassEntry.second->DiagnosticType.empty())
       Types.insert(OpClassEntry.second->DiagnosticType);
   }
+  for (const auto &OpClassEntry : Info.RegisterClassClasses) {
+    if (!OpClassEntry.second->DiagnosticType.empty())
+      Types.insert(OpClassEntry.second->DiagnosticType);
+  }
 
   if (Types.empty()) return;
 
@@ -2650,7 +2764,8 @@ static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
   // a better error handling.
   OS << "OperandMatchResultTy " << Target.getName() << ClassName << "::\n"
      << "MatchOperandParserImpl(OperandVector"
-     << " &Operands,\n                       StringRef Mnemonic) {\n";
+     << " &Operands,\n                       StringRef Mnemonic,\n"
+     << "                       bool ParseForAllFeatures) {\n";
 
   // Emit code to get the available features.
   OS << "  // Get the current feature set.\n";
@@ -2688,10 +2803,9 @@ static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
 
   // Emit check that the required features are available.
   OS << "    // check if the available features match\n";
-  OS << "    if ((AvailableFeatures & it->RequiredFeatures) "
-     << "!= it->RequiredFeatures) {\n";
-  OS << "      continue;\n";
-  OS << "    }\n\n";
+  OS << "    if (!ParseForAllFeatures && (AvailableFeatures & "
+        "it->RequiredFeatures) != it->RequiredFeatures)\n";
+  OS << "        continue;\n\n";
 
   // Emit check to ensure the operand number matches.
   OS << "    // check if the operand in question has a custom parser.\n";
@@ -2713,16 +2827,27 @@ static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
 
 static void emitMnemonicSpellChecker(raw_ostream &OS, CodeGenTarget &Target,
                                      unsigned VariantCount) {
-  OS << "std::string " << Target.getName() << "MnemonicSpellCheck(StringRef S, uint64_t FBS) {\n";
+  OS << "static std::string " << Target.getName()
+     << "MnemonicSpellCheck(StringRef S, uint64_t FBS, unsigned VariantID) {\n";
   if (!VariantCount)
     OS <<  "  return \"\";";
   else {
     OS << "  const unsigned MaxEditDist = 2;\n";
     OS << "  std::vector<StringRef> Candidates;\n";
-    OS << "  StringRef Prev = \"\";\n";
-    OS << "  auto End = std::end(MatchTable0);\n";
-    OS << "\n";
-    OS << "  for (auto I = std::begin(MatchTable0); I < End; I++) {\n";
+    OS << "  StringRef Prev = \"\";\n\n";
+
+    OS << "  // Find the appropriate table for this asm variant.\n";
+    OS << "  const MatchEntry *Start, *End;\n";
+    OS << "  switch (VariantID) {\n";
+    OS << "  default: llvm_unreachable(\"invalid variant!\");\n";
+    for (unsigned VC = 0; VC != VariantCount; ++VC) {
+      Record *AsmVariant = Target.getAsmParserVariant(VC);
+      int AsmVariantNo = AsmVariant->getValueAsInt("Variant");
+      OS << "  case " << AsmVariantNo << ": Start = std::begin(MatchTable" << VC
+         << "); End = std::end(MatchTable" << VC << "); break;\n";
+    }
+    OS << "  }\n\n";
+    OS << "  for (auto I = Start; I < End; I++) {\n";
     OS << "    // Ignore unsupported instructions.\n";
     OS << "    if ((FBS & I->RequiredFeatures) != I->RequiredFeatures)\n";
     OS << "      continue;\n";
@@ -2752,6 +2877,26 @@ static void emitMnemonicSpellChecker(raw_ostream &OS, CodeGenTarget &Target,
 }
 
 
+// Emit a function mapping match classes to strings, for debugging.
+static void emitMatchClassKindNames(std::forward_list<ClassInfo> &Infos,
+                                    raw_ostream &OS) {
+  OS << "#ifndef NDEBUG\n";
+  OS << "const char *getMatchClassName(MatchClassKind Kind) {\n";
+  OS << "  switch (Kind) {\n";
+
+  OS << "  case InvalidMatchClass: return \"InvalidMatchClass\";\n";
+  OS << "  case OptionalMatchClass: return \"OptionalMatchClass\";\n";
+  for (const auto &CI : Infos) {
+    OS << "  case " << CI.Name << ": return \"" << CI.Name << "\";\n";
+  }
+  OS << "  case NumMatchClassKinds: return \"NumMatchClassKinds\";\n";
+
+  OS << "  }\n";
+  OS << "  llvm_unreachable(\"unhandled MatchClassKind!\");\n";
+  OS << "}\n\n";
+  OS << "#endif // NDEBUG\n";
+}
+
 void AsmMatcherEmitter::run(raw_ostream &OS) {
   CodeGenTarget Target(Records);
   Record *AsmParser = Target.getAsmParser();
@@ -2813,6 +2958,8 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
 
   bool HasMnemonicFirst = AsmParser->getValueAsBit("HasMnemonicFirst");
   bool HasOptionalOperands = Info.hasOptionalOperands();
+  bool ReportMultipleNearMisses =
+      AsmParser->getValueAsBit("ReportMultipleNearMisses");
 
   // Write the output.
 
@@ -2835,15 +2982,19 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   OS << "  void convertToMapAndConstraints(unsigned Kind,\n                ";
   OS << "           const OperandVector &Operands) override;\n";
   OS << "  unsigned MatchInstructionImpl(const OperandVector &Operands,\n"
-     << "                                MCInst &Inst,\n"
-     << "                                uint64_t &ErrorInfo,"
-     << " bool matchingInlineAsm,\n"
+     << "                                MCInst &Inst,\n";
+  if (ReportMultipleNearMisses)
+    OS << "                                SmallVectorImpl<NearMissInfo> *NearMisses,\n";
+  else
+    OS << "                                uint64_t &ErrorInfo,\n";
+  OS << "                                bool matchingInlineAsm,\n"
      << "                                unsigned VariantID = 0);\n";
 
   if (!Info.OperandMatchInfo.empty()) {
     OS << "  OperandMatchResultTy MatchOperandParserImpl(\n";
     OS << "    OperandVector &Operands,\n";
-    OS << "    StringRef Mnemonic);\n";
+    OS << "    StringRef Mnemonic,\n";
+    OS << "    bool ParseForAllFeatures = false);\n";
 
     OS << "  OperandMatchResultTy tryCustomParseOperand(\n";
     OS << "    OperandVector &Operands,\n";
@@ -2898,6 +3049,13 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   // Emit the enumeration for classes which participate in matching.
   emitMatchClassEnumeration(Target, Info.Classes, OS);
 
+  // Emit a function to get the user-visible string to describe an operand
+  // match failure in diagnostics.
+  emitOperandMatchErrorDiagStrings(Info, OS);
+
+  // Emit a function to map register classes to operand match failure codes.
+  emitRegisterMatchErrorFunc(Info, OS);
+
   // Emit the routine to match token strings to their match class.
   emitMatchTokenString(Target, Info.Classes, OS);
 
@@ -2907,6 +3065,8 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   // Emit the routine to validate an operand against a match class.
   emitValidateOperandClass(Info, OS);
 
+  emitMatchClassKindNames(Info.Classes, OS);
+
   // Emit the available features compute function.
   SubtargetFeatureInfo::emitComputeAssemblerAvailableFeatures(
       Info.Target.getName(), ClassName, "ComputeAvailableFeatures",
@@ -3015,21 +3175,28 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
     OS << "};\n\n";
   }
 
-  emitMnemonicSpellChecker(OS, Target, VariantCount);
+  OS << "#include \"llvm/Support/Debug.h\"\n";
+  OS << "#include \"llvm/Support/Format.h\"\n\n";
 
   // Finally, build the match function.
   OS << "unsigned " << Target.getName() << ClassName << "::\n"
      << "MatchInstructionImpl(const OperandVector &Operands,\n";
-  OS << "                     MCInst &Inst, uint64_t &ErrorInfo,\n"
-     << "                     bool matchingInlineAsm, unsigned VariantID) {\n";
-
-  OS << "  // Eliminate obvious mismatches.\n";
-  OS << "  if (Operands.size() > "
-     << (MaxNumOperands + HasMnemonicFirst) << ") {\n";
-  OS << "    ErrorInfo = "
-     << (MaxNumOperands + HasMnemonicFirst) << ";\n";
-  OS << "    return Match_InvalidOperand;\n";
-  OS << "  }\n\n";
+  OS << "                     MCInst &Inst,\n";
+  if (ReportMultipleNearMisses)
+    OS << "                     SmallVectorImpl<NearMissInfo> *NearMisses,\n";
+  else
+    OS << "                     uint64_t &ErrorInfo,\n";
+  OS << "                     bool matchingInlineAsm, unsigned VariantID) {\n";
+
+  if (!ReportMultipleNearMisses) {
+    OS << "  // Eliminate obvious mismatches.\n";
+    OS << "  if (Operands.size() > "
+       << (MaxNumOperands + HasMnemonicFirst) << ") {\n";
+    OS << "    ErrorInfo = "
+       << (MaxNumOperands + HasMnemonicFirst) << ";\n";
+    OS << "    return Match_InvalidOperand;\n";
+    OS << "  }\n\n";
+  }
 
   // Emit code to get the available features.
   OS << "  // Get the current feature set.\n";
@@ -3052,17 +3219,20 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   }
 
   // Emit code to compute the class list for this operand vector.
-  OS << "  // Some state to try to produce better error messages.\n";
-  OS << "  bool HadMatchOtherThanFeatures = false;\n";
-  OS << "  bool HadMatchOtherThanPredicate = false;\n";
-  OS << "  unsigned RetCode = Match_InvalidOperand;\n";
-  OS << "  uint64_t MissingFeatures = ~0ULL;\n";
+  if (!ReportMultipleNearMisses) {
+    OS << "  // Some state to try to produce better error messages.\n";
+    OS << "  bool HadMatchOtherThanFeatures = false;\n";
+    OS << "  bool HadMatchOtherThanPredicate = false;\n";
+    OS << "  unsigned RetCode = Match_InvalidOperand;\n";
+    OS << "  uint64_t MissingFeatures = ~0ULL;\n";
+    OS << "  // Set ErrorInfo to the operand that mismatches if it is\n";
+    OS << "  // wrong for all instances of the instruction.\n";
+    OS << "  ErrorInfo = ~0ULL;\n";
+  }
+
   if (HasOptionalOperands) {
     OS << "  SmallBitVector OptionalOperandsMask(" << MaxNumOperands << ");\n";
   }
-  OS << "  // Set ErrorInfo to the operand that mismatches if it is\n";
-  OS << "  // wrong for all instances of the instruction.\n";
-  OS << "  ErrorInfo = ~0ULL;\n";
 
   // Emit code to search the table.
   OS << "  // Find the appropriate table for this asm variant.\n";
@@ -3089,6 +3259,10 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
           "std::equal_range(Start, End, Mnemonic.lower(), LessOpcode());\n\n";
   }
 
+  OS << "  DEBUG_WITH_TYPE(\"asm-matcher\", dbgs() << \"AsmMatcher: found \" <<\n"
+     << "  std::distance(MnemonicRange.first, MnemonicRange.second) << \n"
+     << "  \" encodings with mnemonic '\" << Mnemonic << \"'\\n\");\n\n";
+
   OS << "  // Return a more specific error code if no mnemonics match.\n";
   OS << "  if (MnemonicRange.first == MnemonicRange.second)\n";
   OS << "    return Match_MnemonicFail;\n\n";
@@ -3096,6 +3270,20 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   OS << "  for (const MatchEntry *it = MnemonicRange.first, "
      << "*ie = MnemonicRange.second;\n";
   OS << "       it != ie; ++it) {\n";
+  OS << "    bool HasRequiredFeatures =\n";
+  OS << "      (AvailableFeatures & it->RequiredFeatures) == "
+        "it->RequiredFeatures;\n";
+  OS << "    DEBUG_WITH_TYPE(\"asm-matcher\", dbgs() << \"Trying to match opcode \"\n";
+  OS << "                                          << MII.getName(it->Opcode) << \"\\n\");\n";
+
+  if (ReportMultipleNearMisses) {
+    OS << "    // Some state to record ways in which this instruction did not match.\n";
+    OS << "    NearMissInfo OperandNearMiss = NearMissInfo::getSuccess();\n";
+    OS << "    NearMissInfo FeaturesNearMiss = NearMissInfo::getSuccess();\n";
+    OS << "    NearMissInfo EarlyPredicateNearMiss = NearMissInfo::getSuccess();\n";
+    OS << "    NearMissInfo LatePredicateNearMiss = NearMissInfo::getSuccess();\n";
+    OS << "    bool MultipleInvalidOperands = false;\n";
+  }
 
   if (HasMnemonicFirst) {
     OS << "    // equal_range guarantees that instruction mnemonic matches.\n";
@@ -3103,7 +3291,8 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   }
 
   // Emit check that the subclasses match.
-  OS << "    bool OperandsValid = true;\n";
+  if (!ReportMultipleNearMisses)
+    OS << "    bool OperandsValid = true;\n";
   if (HasOptionalOperands) {
     OS << "    OptionalOperandsMask.reset(0, " << MaxNumOperands << ");\n";
   }
@@ -3112,30 +3301,71 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
      << "; FormalIdx != " << MaxNumOperands << "; ++FormalIdx) {\n";
   OS << "      auto Formal = "
      << "static_cast<MatchClassKind>(it->Classes[FormalIdx]);\n";
+  OS << "      DEBUG_WITH_TYPE(\"asm-matcher\",\n";
+  OS << "                      dbgs() << \"  Matching formal operand class \" << getMatchClassName(Formal)\n";
+  OS << "                             << \" against actual operand at index \" << ActualIdx);\n";
+  OS << "      if (ActualIdx < Operands.size())\n";
+  OS << "        DEBUG_WITH_TYPE(\"asm-matcher\", dbgs() << \" (\";\n";
+  OS << "                        Operands[ActualIdx]->print(dbgs()); dbgs() << \"): \");\n";
+  OS << "      else\n";
+  OS << "        DEBUG_WITH_TYPE(\"asm-matcher\", dbgs() << \": \");\n";
   OS << "      if (ActualIdx >= Operands.size()) {\n";
-  OS << "        OperandsValid = (Formal == " <<"InvalidMatchClass) || "
-                                 "isSubclass(Formal, OptionalMatchClass);\n";
-  OS << "        if (!OperandsValid) ErrorInfo = ActualIdx;\n";
-  if (HasOptionalOperands) {
-    OS << "        OptionalOperandsMask.set(FormalIdx, " << MaxNumOperands
-       << ");\n";
+  OS << "        DEBUG_WITH_TYPE(\"asm-matcher\", dbgs() << \"actual operand index out of range \");\n";
+  if (ReportMultipleNearMisses) {
+    OS << "        bool ThisOperandValid = (Formal == " <<"InvalidMatchClass) || "
+                                   "isSubclass(Formal, OptionalMatchClass);\n";
+    OS << "        if (!ThisOperandValid) {\n";
+    OS << "          if (!OperandNearMiss) {\n";
+    OS << "            // Record info about match failure for later use.\n";
+    OS << "            DEBUG_WITH_TYPE(\"asm-matcher\", dbgs() << \"recording too-few-operands near miss\\n\");\n";
+    OS << "            OperandNearMiss =\n";
+    OS << "                NearMissInfo::getTooFewOperands(Formal, it->Opcode);\n";
+    OS << "          } else if (OperandNearMiss.getKind() != NearMissInfo::NearMissTooFewOperands) {\n";
+    OS << "            // If more than one operand is invalid, give up on this match entry.\n";
+    OS << "            DEBUG_WITH_TYPE(\n";
+    OS << "                \"asm-matcher\",\n";
+    OS << "                dbgs() << \"second invalid operand, giving up on this opcode\\n\");\n";
+    OS << "            MultipleInvalidOperands = true;\n";
+    OS << "            break;\n";
+    OS << "          }\n";
+    OS << "        } else {\n";
+    OS << "          DEBUG_WITH_TYPE(\"asm-matcher\", dbgs() << \"but formal operand not required\\n\");\n";
+    OS << "          break;\n";
+    OS << "        }\n";
+    OS << "        continue;\n";
+  } else {
+    OS << "        OperandsValid = (Formal == InvalidMatchClass) || isSubclass(Formal, OptionalMatchClass);\n";
+    OS << "        if (!OperandsValid) ErrorInfo = ActualIdx;\n";
+    if (HasOptionalOperands) {
+      OS << "        OptionalOperandsMask.set(FormalIdx, " << MaxNumOperands
+         << ");\n";
+    }
+    OS << "        break;\n";
   }
-  OS << "        break;\n";
   OS << "      }\n";
   OS << "      MCParsedAsmOperand &Actual = *Operands[ActualIdx];\n";
   OS << "      unsigned Diag = validateOperandClass(Actual, Formal);\n";
   OS << "      if (Diag == Match_Success) {\n";
+  OS << "        DEBUG_WITH_TYPE(\"asm-matcher\",\n";
+  OS << "                        dbgs() << \"match success using generic matcher\\n\");\n";
   OS << "        ++ActualIdx;\n";
   OS << "        continue;\n";
   OS << "      }\n";
   OS << "      // If the generic handler indicates an invalid operand\n";
   OS << "      // failure, check for a special case.\n";
-  OS << "      if (Diag == Match_InvalidOperand) {\n";
-  OS << "        Diag = validateTargetOperandClass(Actual, Formal);\n";
-  OS << "        if (Diag == Match_Success) {\n";
+  OS << "      if (Diag != Match_Success) {\n";
+  OS << "        unsigned TargetDiag = validateTargetOperandClass(Actual, Formal);\n";
+  OS << "        if (TargetDiag == Match_Success) {\n";
+  OS << "          DEBUG_WITH_TYPE(\"asm-matcher\",\n";
+  OS << "                          dbgs() << \"match success using target matcher\\n\");\n";
   OS << "          ++ActualIdx;\n";
   OS << "          continue;\n";
   OS << "        }\n";
+  OS << "        // If the target matcher returned a specific error code use\n";
+  OS << "        // that, else use the one from the generic matcher.\n";
+  OS << "        if (TargetDiag != Match_InvalidOperand && "
+        "HasRequiredFeatures)\n";
+  OS << "          Diag = TargetDiag;\n";
   OS << "      }\n";
   OS << "      // If current formal operand wasn't matched and it is optional\n"
      << "      // then try to match next formal operand\n";
@@ -3144,36 +3374,76 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   if (HasOptionalOperands) {
     OS << "        OptionalOperandsMask.set(FormalIdx);\n";
   }
+    OS << "        DEBUG_WITH_TYPE(\"asm-matcher\", dbgs() << \"ignoring optional operand\\n\");\n";
   OS << "        continue;\n";
   OS << "      }\n";
-  OS << "      // If this operand is broken for all of the instances of this\n";
-  OS << "      // mnemonic, keep track of it so we can report loc info.\n";
-  OS << "      // If we already had a match that only failed due to a\n";
-  OS << "      // target predicate, that diagnostic is preferred.\n";
-  OS << "      if (!HadMatchOtherThanPredicate &&\n";
-  OS << "          (it == MnemonicRange.first || ErrorInfo <= ActualIdx)) {\n";
-  OS << "        ErrorInfo = ActualIdx;\n";
-  OS << "        // InvalidOperand is the default. Prefer specificity.\n";
-  OS << "        if (Diag != Match_InvalidOperand)\n";
-  OS << "          RetCode = Diag;\n";
-  OS << "      }\n";
-  OS << "      // Otherwise, just reject this instance of the mnemonic.\n";
-  OS << "      OperandsValid = false;\n";
-  OS << "      break;\n";
-  OS << "    }\n\n";
 
-  OS << "    if (!OperandsValid) continue;\n";
+  if (ReportMultipleNearMisses) {
+    OS << "      if (!OperandNearMiss) {\n";
+    OS << "        // If this is the first invalid operand we have seen, record some\n";
+    OS << "        // information about it.\n";
+    OS << "        DEBUG_WITH_TYPE(\n";
+    OS << "            \"asm-matcher\",\n";
+    OS << "            dbgs()\n";
+    OS << "                << \"operand match failed, recording near-miss with diag code \"\n";
+    OS << "                << Diag << \"\\n\");\n";
+    OS << "        OperandNearMiss =\n";
+    OS << "            NearMissInfo::getMissedOperand(Diag, Formal, it->Opcode, ActualIdx);\n";
+    OS << "        ++ActualIdx;\n";
+    OS << "      } else {\n";
+    OS << "        // If more than one operand is invalid, give up on this match entry.\n";
+    OS << "        DEBUG_WITH_TYPE(\n";
+    OS << "            \"asm-matcher\",\n";
+    OS << "            dbgs() << \"second operand mismatch, skipping this opcode\\n\");\n";
+    OS << "        MultipleInvalidOperands = true;\n";
+    OS << "        break;\n";
+    OS << "      }\n";
+    OS << "    }\n\n";
+  } else {
+    OS << "      // If this operand is broken for all of the instances of this\n";
+    OS << "      // mnemonic, keep track of it so we can report loc info.\n";
+    OS << "      // If we already had a match that only failed due to a\n";
+    OS << "      // target predicate, that diagnostic is preferred.\n";
+    OS << "      if (!HadMatchOtherThanPredicate &&\n";
+    OS << "          (it == MnemonicRange.first || ErrorInfo <= ActualIdx)) {\n";
+    OS << "        if (HasRequiredFeatures && (ErrorInfo != ActualIdx || Diag "
+          "!= Match_InvalidOperand))\n";
+    OS << "          RetCode = Diag;\n";
+    OS << "        ErrorInfo = ActualIdx;\n";
+    OS << "      }\n";
+    OS << "      // Otherwise, just reject this instance of the mnemonic.\n";
+    OS << "      OperandsValid = false;\n";
+    OS << "      break;\n";
+    OS << "    }\n\n";
+  }
+
+  if (ReportMultipleNearMisses)
+    OS << "    if (MultipleInvalidOperands) {\n";
+  else
+    OS << "    if (!OperandsValid) {\n";
+  OS << "      DEBUG_WITH_TYPE(\"asm-matcher\", dbgs() << \"Opcode result: multiple \"\n";
+  OS << "                                               \"operand mismatches, ignoring \"\n";
+  OS << "                                               \"this opcode\\n\");\n";
+  OS << "      continue;\n";
+  OS << "    }\n";
 
   // Emit check that the required features are available.
-  OS << "    if ((AvailableFeatures & it->RequiredFeatures) "
-     << "!= it->RequiredFeatures) {\n";
-  OS << "      HadMatchOtherThanFeatures = true;\n";
+  OS << "    if (!HasRequiredFeatures) {\n";
+  if (!ReportMultipleNearMisses)
+    OS << "      HadMatchOtherThanFeatures = true;\n";
   OS << "      uint64_t NewMissingFeatures = it->RequiredFeatures & "
         "~AvailableFeatures;\n";
-  OS << "      if (countPopulation(NewMissingFeatures) <=\n"
-        "          countPopulation(MissingFeatures))\n";
-  OS << "        MissingFeatures = NewMissingFeatures;\n";
-  OS << "      continue;\n";
+  OS << "      DEBUG_WITH_TYPE(\"asm-matcher\", dbgs() << \"Missing target features: \"\n";
+  OS << "                                            << format_hex(NewMissingFeatures, 18)\n";
+  OS << "                                            << \"\\n\");\n";
+  if (ReportMultipleNearMisses) {
+    OS << "      FeaturesNearMiss = NearMissInfo::getMissedFeature(NewMissingFeatures);\n";
+  } else {
+    OS << "      if (countPopulation(NewMissingFeatures) <=\n"
+          "          countPopulation(MissingFeatures))\n";
+    OS << "        MissingFeatures = NewMissingFeatures;\n";
+    OS << "      continue;\n";
+  }
   OS << "    }\n";
   OS << "\n";
   OS << "    Inst.clear();\n\n";
@@ -3189,11 +3459,40 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
      << "    unsigned MatchResult;\n"
      << "    if ((MatchResult = checkEarlyTargetMatchPredicate(Inst, "
         "Operands)) != Match_Success) {\n"
-     << "      Inst.clear();\n"
-     << "      RetCode = MatchResult;\n"
-     << "      HadMatchOtherThanPredicate = true;\n"
-     << "      continue;\n"
-     << "    }\n\n";
+     << "      Inst.clear();\n";
+  OS << "      DEBUG_WITH_TYPE(\n";
+  OS << "          \"asm-matcher\",\n";
+  OS << "          dbgs() << \"Early target match predicate failed with diag code \"\n";
+  OS << "                 << MatchResult << \"\\n\");\n";
+  if (ReportMultipleNearMisses) {
+    OS << "      EarlyPredicateNearMiss = NearMissInfo::getMissedPredicate(MatchResult);\n";
+  } else {
+    OS << "      RetCode = MatchResult;\n"
+       << "      HadMatchOtherThanPredicate = true;\n"
+       << "      continue;\n";
+  }
+  OS << "    }\n\n";
+
+  if (ReportMultipleNearMisses) {
+    OS << "    // If we did not successfully match the operands, then we can't convert to\n";
+    OS << "    // an MCInst, so bail out on this instruction variant now.\n";
+    OS << "    if (OperandNearMiss) {\n";
+    OS << "      // If the operand mismatch was the only problem, reprrt it as a near-miss.\n";
+    OS << "      if (NearMisses && !FeaturesNearMiss && !EarlyPredicateNearMiss) {\n";
+    OS << "        DEBUG_WITH_TYPE(\n";
+    OS << "            \"asm-matcher\",\n";
+    OS << "            dbgs()\n";
+    OS << "                << \"Opcode result: one mismatched operand, adding near-miss\\n\");\n";
+    OS << "        NearMisses->push_back(OperandNearMiss);\n";
+    OS << "      } else {\n";
+    OS << "        DEBUG_WITH_TYPE(\"asm-matcher\", dbgs() << \"Opcode result: multiple \"\n";
+    OS << "                                                 \"types of mismatch, so not \"\n";
+    OS << "                                                 \"reporting near-miss\\n\");\n";
+    OS << "      }\n";
+    OS << "      continue;\n";
+    OS << "    }\n\n";
+  }
+
   OS << "    if (matchingInlineAsm) {\n";
   OS << "      convertToMapAndConstraints(it->ConvertFn, Operands);\n";
   OS << "      return Match_Success;\n";
@@ -3213,11 +3512,46 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
      << "    // handle any context sensitive constraints.\n"
      << "    if ((MatchResult = checkTargetMatchPredicate(Inst)) !="
      << " Match_Success) {\n"
-     << "      Inst.clear();\n"
-     << "      RetCode = MatchResult;\n"
-     << "      HadMatchOtherThanPredicate = true;\n"
-     << "      continue;\n"
-     << "    }\n\n";
+     << "      DEBUG_WITH_TYPE(\"asm-matcher\",\n"
+     << "                      dbgs() << \"Target match predicate failed with diag code \"\n"
+     << "                             << MatchResult << \"\\n\");\n"
+     << "      Inst.clear();\n";
+  if (ReportMultipleNearMisses) {
+    OS << "      LatePredicateNearMiss = NearMissInfo::getMissedPredicate(MatchResult);\n";
+  } else {
+    OS << "      RetCode = MatchResult;\n"
+       << "      HadMatchOtherThanPredicate = true;\n"
+       << "      continue;\n";
+  }
+  OS << "    }\n\n";
+
+  if (ReportMultipleNearMisses) {
+    OS << "    int NumNearMisses = ((int)(bool)OperandNearMiss +\n";
+    OS << "                         (int)(bool)FeaturesNearMiss +\n";
+    OS << "                         (int)(bool)EarlyPredicateNearMiss +\n";
+    OS << "                         (int)(bool)LatePredicateNearMiss);\n";
+    OS << "    if (NumNearMisses == 1) {\n";
+    OS << "      // We had exactly one type of near-miss, so add that to the list.\n";
+    OS << "      assert(!OperandNearMiss && \"OperandNearMiss was handled earlier\");\n";
+    OS << "      DEBUG_WITH_TYPE(\"asm-matcher\", dbgs() << \"Opcode result: found one type of \"\n";
+    OS << "                                            \"mismatch, so reporting a \"\n";
+    OS << "                                            \"near-miss\\n\");\n";
+    OS << "      if (NearMisses && FeaturesNearMiss)\n";
+    OS << "        NearMisses->push_back(FeaturesNearMiss);\n";
+    OS << "      else if (NearMisses && EarlyPredicateNearMiss)\n";
+    OS << "        NearMisses->push_back(EarlyPredicateNearMiss);\n";
+    OS << "      else if (NearMisses && LatePredicateNearMiss)\n";
+    OS << "        NearMisses->push_back(LatePredicateNearMiss);\n";
+    OS << "\n";
+    OS << "      continue;\n";
+    OS << "    } else if (NumNearMisses > 1) {\n";
+    OS << "      // This instruction missed in more than one way, so ignore it.\n";
+    OS << "      DEBUG_WITH_TYPE(\"asm-matcher\", dbgs() << \"Opcode result: multiple \"\n";
+    OS << "                                               \"types of mismatch, so not \"\n";
+    OS << "                                               \"reporting near-miss\\n\");\n";
+    OS << "      continue;\n";
+    OS << "    }\n";
+  }
 
   // Call the post-processing function, if used.
   StringRef InsnCleanupFn = AsmParser->getValueAsString("AsmParserInstCleanup");
@@ -3235,15 +3569,23 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
     OS << "    }\n";
   }
 
+  OS << "    DEBUG_WITH_TYPE(\n";
+  OS << "        \"asm-matcher\",\n";
+  OS << "        dbgs() << \"Opcode result: complete match, selecting this opcode\\n\");\n";
   OS << "    return Match_Success;\n";
   OS << "  }\n\n";
 
-  OS << "  // Okay, we had no match.  Try to return a useful error code.\n";
-  OS << "  if (HadMatchOtherThanPredicate || !HadMatchOtherThanFeatures)\n";
-  OS << "    return RetCode;\n\n";
-  OS << "  // Missing feature matches return which features were missing\n";
-  OS << "  ErrorInfo = MissingFeatures;\n";
-  OS << "  return Match_MissingFeature;\n";
+  if (ReportMultipleNearMisses) {
+    OS << "  // No instruction variants matched exactly.\n";
+    OS << "  return Match_NearMisses;\n";
+  } else {
+    OS << "  // Okay, we had no match.  Try to return a useful error code.\n";
+    OS << "  if (HadMatchOtherThanPredicate || !HadMatchOtherThanFeatures)\n";
+    OS << "    return RetCode;\n\n";
+    OS << "  // Missing feature matches return which features were missing\n";
+    OS << "  ErrorInfo = MissingFeatures;\n";
+    OS << "  return Match_MissingFeature;\n";
+  }
   OS << "}\n\n";
 
   if (!Info.OperandMatchInfo.empty())
@@ -3251,6 +3593,13 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
                              MaxMnemonicIndex, HasMnemonicFirst);
 
   OS << "#endif // GET_MATCHER_IMPLEMENTATION\n\n";
+
+  OS << "\n#ifdef GET_MNEMONIC_SPELL_CHECKER\n";
+  OS << "#undef GET_MNEMONIC_SPELL_CHECKER\n\n";
+
+  emitMnemonicSpellChecker(OS, Target, VariantCount);
+
+  OS << "#endif // GET_MNEMONIC_SPELL_CHECKER\n\n";
 }
 
 namespace llvm {
diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp
index 75b9bc6cca40..723c0cd773f7 100644
--- a/utils/TableGen/AsmWriterEmitter.cpp
+++ b/utils/TableGen/AsmWriterEmitter.cpp
@@ -820,8 +820,8 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
       }
 
       unsigned NumMIOps = 0;
-      for (auto &Operand : CGA.ResultOperands)
-        NumMIOps += Operand.getMINumOperands();
+      for (auto &ResultInstOpnd : CGA.ResultInst->Operands)
+        NumMIOps += ResultInstOpnd.MINumOperands;
 
       std::string Cond;
       Cond = std::string("MI->getNumOperands() == ") + utostr(NumMIOps);
@@ -831,6 +831,19 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
 
       unsigned MIOpNum = 0;
       for (unsigned i = 0, e = LastOpNo; i != e; ++i) {
+        // Skip over tied operands as they're not part of an alias declaration.
+        auto &Operands = CGA.ResultInst->Operands;
+        unsigned OpNum = Operands.getSubOperandNumber(MIOpNum).first;
+        if (Operands[OpNum].MINumOperands == 1 &&
+            Operands[OpNum].getTiedRegister() != -1) {
+          // Tied operands of different RegisterClass should be explicit within
+          // an instruction's syntax and so cannot be skipped.
+          int TiedOpNum = Operands[OpNum].getTiedRegister();
+          if (Operands[OpNum].Rec->getName() ==
+              Operands[TiedOpNum].Rec->getName())
+            ++MIOpNum;
+        }
+
         std::string Op = "MI->getOperand(" + utostr(MIOpNum) + ")";
 
         const CodeGenInstAlias::ResultOperand &RO = CGA.ResultOperands[i];
@@ -1024,8 +1037,10 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
   O << "  OS << '\\t' << StringRef(AsmString, I);\n";
 
   O << "  if (AsmString[I] != '\\0') {\n";
-  O << "    if (AsmString[I] == ' ' || AsmString[I] == '\\t')";
+  O << "    if (AsmString[I] == ' ' || AsmString[I] == '\\t') {\n";
   O << "      OS << '\\t';\n";
+  O << "      ++I;\n";
+  O << "    }\n";
   O << "    do {\n";
   O << "      if (AsmString[I] == '$') {\n";
   O << "        ++I;\n";
diff --git a/utils/TableGen/Attributes.cpp b/utils/TableGen/Attributes.cpp
index d64d30e18c3e..6bfc0ab896f9 100644
--- a/utils/TableGen/Attributes.cpp
+++ b/utils/TableGen/Attributes.cpp
@@ -7,9 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include <algorithm>
 #include <string>
diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt
index b2913afae12a..c84f4925aa78 100644
--- a/utils/TableGen/CMakeLists.txt
+++ b/utils/TableGen/CMakeLists.txt
@@ -8,6 +8,7 @@ add_tablegen(llvm-tblgen LLVM
   CallingConvEmitter.cpp
   CodeEmitterGen.cpp
   CodeGenDAGPatterns.cpp
+  CodeGenHwModes.cpp
   CodeGenInstruction.cpp
   CodeGenMapTable.cpp
   CodeGenRegisters.cpp
@@ -23,7 +24,9 @@ add_tablegen(llvm-tblgen LLVM
   FastISelEmitter.cpp
   FixedLenDecoderEmitter.cpp
   GlobalISelEmitter.cpp
+  InfoByHwMode.cpp
   InstrInfoEmitter.cpp
+  InstrDocsEmitter.cpp
   IntrinsicEmitter.cpp
   OptParserEmitter.cpp
   PseudoLoweringEmitter.cpp
@@ -36,7 +39,9 @@ add_tablegen(llvm-tblgen LLVM
   Types.cpp
   X86DisassemblerTables.cpp
   X86EVEX2VEXTablesEmitter.cpp
+  X86FoldTablesEmitter.cpp
   X86ModRMFilters.cpp
   X86RecognizableInstr.cpp
   CTagsEmitter.cpp
   )
+set_target_properties(llvm-tblgen PROPERTIES FOLDER "Tablegenning")
diff --git a/utils/TableGen/CallingConvEmitter.cpp b/utils/TableGen/CallingConvEmitter.cpp
index 013e96065752..d452031f8850 100644
--- a/utils/TableGen/CallingConvEmitter.cpp
+++ b/utils/TableGen/CallingConvEmitter.cpp
@@ -39,21 +39,21 @@ void CallingConvEmitter::run(raw_ostream &O) {
 
   // Emit prototypes for all of the non-custom CC's so that they can forward ref
   // each other.
-  for (unsigned i = 0, e = CCs.size(); i != e; ++i) {
-    if (!CCs[i]->getValueAsBit("Custom")) {
-      O << "static bool " << CCs[i]->getName()
+  for (Record *CC : CCs) {
+    if (!CC->getValueAsBit("Custom")) {
+      O << "static bool " << CC->getName()
         << "(unsigned ValNo, MVT ValVT,\n"
-        << std::string(CCs[i]->getName().size() + 13, ' ')
+        << std::string(CC->getName().size() + 13, ' ')
         << "MVT LocVT, CCValAssign::LocInfo LocInfo,\n"
-        << std::string(CCs[i]->getName().size() + 13, ' ')
+        << std::string(CC->getName().size() + 13, ' ')
         << "ISD::ArgFlagsTy ArgFlags, CCState &State);\n";
     }
   }
 
   // Emit each non-custom calling convention description in full.
-  for (unsigned i = 0, e = CCs.size(); i != e; ++i) {
-    if (!CCs[i]->getValueAsBit("Custom"))
-      EmitCallingConv(CCs[i], O);
+  for (Record *CC : CCs) {
+    if (!CC->getValueAsBit("Custom"))
+      EmitCallingConv(CC, O);
   }
 }
 
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index e48ba3845326..51473f06da79 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -13,9 +13,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeGenDAGPatterns.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -28,752 +31,1116 @@ using namespace llvm;
 
 #define DEBUG_TYPE "dag-patterns"
 
-//===----------------------------------------------------------------------===//
-//  EEVT::TypeSet Implementation
-//===----------------------------------------------------------------------===//
-
-static inline bool isInteger(MVT::SimpleValueType VT) {
-  return MVT(VT).isInteger();
+static inline bool isIntegerOrPtr(MVT VT) {
+  return VT.isInteger() || VT == MVT::iPTR;
 }
-static inline bool isFloatingPoint(MVT::SimpleValueType VT) {
-  return MVT(VT).isFloatingPoint();
+static inline bool isFloatingPoint(MVT VT) {
+  return VT.isFloatingPoint();
 }
-static inline bool isVector(MVT::SimpleValueType VT) {
-  return MVT(VT).isVector();
+static inline bool isVector(MVT VT) {
+  return VT.isVector();
 }
-static inline bool isScalar(MVT::SimpleValueType VT) {
-  return !MVT(VT).isVector();
+static inline bool isScalar(MVT VT) {
+  return !VT.isVector();
 }
 
-EEVT::TypeSet::TypeSet(MVT::SimpleValueType VT, TreePattern &TP) {
-  if (VT == MVT::iAny)
-    EnforceInteger(TP);
-  else if (VT == MVT::fAny)
-    EnforceFloatingPoint(TP);
-  else if (VT == MVT::vAny)
-    EnforceVector(TP);
-  else {
-    assert((VT < MVT::LAST_VALUETYPE || VT == MVT::iPTR ||
-            VT == MVT::iPTRAny || VT == MVT::Any) && "Not a concrete type!");
-    TypeVec.push_back(VT);
+template <typename Predicate>
+static bool berase_if(MachineValueTypeSet &S, Predicate P) {
+  bool Erased = false;
+  // It is ok to iterate over MachineValueTypeSet and remove elements from it
+  // at the same time.
+  for (MVT T : S) {
+    if (!P(T))
+      continue;
+    Erased = true;
+    S.erase(T);
   }
+  return Erased;
 }
 
+// --- TypeSetByHwMode
 
-EEVT::TypeSet::TypeSet(ArrayRef<MVT::SimpleValueType> VTList) {
-  assert(!VTList.empty() && "empty list?");
-  TypeVec.append(VTList.begin(), VTList.end());
-
-  if (!VTList.empty())
-    assert(VTList[0] != MVT::iAny && VTList[0] != MVT::vAny &&
-           VTList[0] != MVT::fAny);
+// This is a parameterized type-set class. For each mode there is a list
+// of types that are currently possible for a given tree node. Type
+// inference will apply to each mode separately.
 
-  // Verify no duplicates.
-  array_pod_sort(TypeVec.begin(), TypeVec.end());
-  assert(std::unique(TypeVec.begin(), TypeVec.end()) == TypeVec.end());
+TypeSetByHwMode::TypeSetByHwMode(ArrayRef<ValueTypeByHwMode> VTList) {
+  for (const ValueTypeByHwMode &VVT : VTList)
+    insert(VVT);
 }
 
-/// FillWithPossibleTypes - Set to all legal types and return true, only valid
-/// on completely unknown type sets.
-bool EEVT::TypeSet::FillWithPossibleTypes(TreePattern &TP,
-                                          bool (*Pred)(MVT::SimpleValueType),
-                                          const char *PredicateName) {
-  assert(isCompletelyUnknown());
-  ArrayRef<MVT::SimpleValueType> LegalTypes =
-    TP.getDAGPatterns().getTargetInfo().getLegalValueTypes();
-
-  if (TP.hasError())
-    return false;
-
-  for (MVT::SimpleValueType VT : LegalTypes)
-    if (!Pred || Pred(VT))
-      TypeVec.push_back(VT);
-
-  // If we have nothing that matches the predicate, bail out.
-  if (TypeVec.empty()) {
-    TP.error("Type inference contradiction found, no " +
-             std::string(PredicateName) + " types found");
-    return false;
+bool TypeSetByHwMode::isValueTypeByHwMode(bool AllowEmpty) const {
+  for (const auto &I : *this) {
+    if (I.second.size() > 1)
+      return false;
+    if (!AllowEmpty && I.second.empty())
+      return false;
   }
-  // No need to sort with one element.
-  if (TypeVec.size() == 1) return true;
-
-  // Remove duplicates.
-  array_pod_sort(TypeVec.begin(), TypeVec.end());
-  TypeVec.erase(std::unique(TypeVec.begin(), TypeVec.end()), TypeVec.end());
-
   return true;
 }
 
-/// hasIntegerTypes - Return true if this TypeSet contains iAny or an
-/// integer value type.
-bool EEVT::TypeSet::hasIntegerTypes() const {
-  return any_of(TypeVec, isInteger);
+ValueTypeByHwMode TypeSetByHwMode::getValueTypeByHwMode() const {
+  assert(isValueTypeByHwMode(true) &&
+         "The type set has multiple types for at least one HW mode");
+  ValueTypeByHwMode VVT;
+  for (const auto &I : *this) {
+    MVT T = I.second.empty() ? MVT::Other : *I.second.begin();
+    VVT.getOrCreateTypeForMode(I.first, T);
+  }
+  return VVT;
 }
 
-/// hasFloatingPointTypes - Return true if this TypeSet contains an fAny or
-/// a floating point value type.
-bool EEVT::TypeSet::hasFloatingPointTypes() const {
-  return any_of(TypeVec, isFloatingPoint);
+bool TypeSetByHwMode::isPossible() const {
+  for (const auto &I : *this)
+    if (!I.second.empty())
+      return true;
+  return false;
 }
 
-/// hasScalarTypes - Return true if this TypeSet contains a scalar value type.
-bool EEVT::TypeSet::hasScalarTypes() const {
-  return any_of(TypeVec, isScalar);
+bool TypeSetByHwMode::insert(const ValueTypeByHwMode &VVT) {
+  bool Changed = false;
+  SmallDenseSet<unsigned, 4> Modes;
+  for (const auto &P : VVT) {
+    unsigned M = P.first;
+    Modes.insert(M);
+    // Make sure there exists a set for each specific mode from VVT.
+    Changed |= getOrCreate(M).insert(P.second).second;
+  }
+
+  // If VVT has a default mode, add the corresponding type to all
+  // modes in "this" that do not exist in VVT.
+  if (Modes.count(DefaultMode)) {
+    MVT DT = VVT.getType(DefaultMode);
+    for (auto &I : *this)
+      if (!Modes.count(I.first))
+        Changed |= I.second.insert(DT).second;
+  }
+  return Changed;
 }
 
-/// hasVectorTypes - Return true if this TypeSet contains a vAny or a vector
-/// value type.
-bool EEVT::TypeSet::hasVectorTypes() const {
-  return any_of(TypeVec, isVector);
+// Constrain the type set to be the intersection with VTS.
+bool TypeSetByHwMode::constrain(const TypeSetByHwMode &VTS) {
+  bool Changed = false;
+  if (hasDefault()) {
+    for (const auto &I : VTS) {
+      unsigned M = I.first;
+      if (M == DefaultMode || hasMode(M))
+        continue;
+      Map.insert({M, Map.at(DefaultMode)});
+      Changed = true;
+    }
+  }
+
+  for (auto &I : *this) {
+    unsigned M = I.first;
+    SetType &S = I.second;
+    if (VTS.hasMode(M) || VTS.hasDefault()) {
+      Changed |= intersect(I.second, VTS.get(M));
+    } else if (!S.empty()) {
+      S.clear();
+      Changed = true;
+    }
+  }
+  return Changed;
 }
 
+template <typename Predicate>
+bool TypeSetByHwMode::constrain(Predicate P) {
+  bool Changed = false;
+  for (auto &I : *this)
+    Changed |= berase_if(I.second, [&P](MVT VT) { return !P(VT); });
+  return Changed;
+}
 
-std::string EEVT::TypeSet::getName() const {
-  if (TypeVec.empty()) return "<empty>";
+template <typename Predicate>
+bool TypeSetByHwMode::assign_if(const TypeSetByHwMode &VTS, Predicate P) {
+  assert(empty());
+  for (const auto &I : VTS) {
+    SetType &S = getOrCreate(I.first);
+    for (auto J : I.second)
+      if (P(J))
+        S.insert(J);
+  }
+  return !empty();
+}
 
-  std::string Result;
+void TypeSetByHwMode::writeToStream(raw_ostream &OS) const {
+  SmallVector<unsigned, 4> Modes;
+  Modes.reserve(Map.size());
 
-  for (unsigned i = 0, e = TypeVec.size(); i != e; ++i) {
-    std::string VTName = llvm::getEnumName(TypeVec[i]);
-    // Strip off MVT:: prefix if present.
-    if (VTName.substr(0,5) == "MVT::")
-      VTName = VTName.substr(5);
-    if (i) Result += ':';
-    Result += VTName;
+  for (const auto &I : *this)
+    Modes.push_back(I.first);
+  if (Modes.empty()) {
+    OS << "{}";
+    return;
   }
+  array_pod_sort(Modes.begin(), Modes.end());
 
-  if (TypeVec.size() == 1)
-    return Result;
-  return "{" + Result + "}";
+  OS << '{';
+  for (unsigned M : Modes) {
+    OS << ' ' << getModeName(M) << ':';
+    writeToStream(get(M), OS);
+  }
+  OS << " }";
 }
 
-/// MergeInTypeInfo - This merges in type information from the specified
-/// argument.  If 'this' changes, it returns true.  If the two types are
-/// contradictory (e.g. merge f32 into i32) then this flags an error.
-bool EEVT::TypeSet::MergeInTypeInfo(const EEVT::TypeSet &InVT, TreePattern &TP){
-  if (InVT.isCompletelyUnknown() || *this == InVT || TP.hasError())
-    return false;
+void TypeSetByHwMode::writeToStream(const SetType &S, raw_ostream &OS) {
+  SmallVector<MVT, 4> Types(S.begin(), S.end());
+  array_pod_sort(Types.begin(), Types.end());
 
-  if (isCompletelyUnknown()) {
-    *this = InVT;
-    return true;
+  OS << '[';
+  for (unsigned i = 0, e = Types.size(); i != e; ++i) {
+    OS << ValueTypeByHwMode::getMVTName(Types[i]);
+    if (i != e-1)
+      OS << ' ';
   }
+  OS << ']';
+}
 
-  assert(!TypeVec.empty() && !InVT.TypeVec.empty() && "No unknowns");
+bool TypeSetByHwMode::operator==(const TypeSetByHwMode &VTS) const {
+  bool HaveDefault = hasDefault();
+  if (HaveDefault != VTS.hasDefault())
+    return false;
 
-  // Handle the abstract cases, seeing if we can resolve them better.
-  switch (TypeVec[0]) {
-  default: break;
-  case MVT::iPTR:
-  case MVT::iPTRAny:
-    if (InVT.hasIntegerTypes()) {
-      EEVT::TypeSet InCopy(InVT);
-      InCopy.EnforceInteger(TP);
-      InCopy.EnforceScalar(TP);
+  if (isSimple()) {
+    if (VTS.isSimple())
+      return *begin() == *VTS.begin();
+    return false;
+  }
 
-      if (InCopy.isConcrete()) {
-        // If the RHS has one integer type, upgrade iPTR to i32.
-        TypeVec[0] = InVT.TypeVec[0];
-        return true;
-      }
+  SmallDenseSet<unsigned, 4> Modes;
+  for (auto &I : *this)
+    Modes.insert(I.first);
+  for (const auto &I : VTS)
+    Modes.insert(I.first);
 
-      // If the input has multiple scalar integers, this doesn't add any info.
-      if (!InCopy.isCompletelyUnknown())
+  if (HaveDefault) {
+    // Both sets have default mode.
+    for (unsigned M : Modes) {
+      if (get(M) != VTS.get(M))
+        return false;
+    }
+  } else {
+    // Neither set has default mode.
+    for (unsigned M : Modes) {
+      // If there is no default mode, an empty set is equivalent to not having
+      // the corresponding mode.
+      bool NoModeThis = !hasMode(M) || get(M).empty();
+      bool NoModeVTS = !VTS.hasMode(M) || VTS.get(M).empty();
+      if (NoModeThis != NoModeVTS)
         return false;
+      if (!NoModeThis)
+        if (get(M) != VTS.get(M))
+          return false;
     }
-    break;
   }
 
-  // If the input constraint is iAny/iPTR and this is an integer type list,
-  // remove non-integer types from the list.
-  if ((InVT.TypeVec[0] == MVT::iPTR || InVT.TypeVec[0] == MVT::iPTRAny) &&
-      hasIntegerTypes()) {
-    bool MadeChange = EnforceInteger(TP);
+  return true;
+}
 
-    // If we're merging in iPTR/iPTRAny and the node currently has a list of
-    // multiple different integer types, replace them with a single iPTR.
-    if ((InVT.TypeVec[0] == MVT::iPTR || InVT.TypeVec[0] == MVT::iPTRAny) &&
-        TypeVec.size() != 1) {
-      TypeVec.assign(1, InVT.TypeVec[0]);
-      MadeChange = true;
-    }
+namespace llvm {
+  raw_ostream &operator<<(raw_ostream &OS, const TypeSetByHwMode &T) {
+    T.writeToStream(OS);
+    return OS;
+  }
+}
 
-    return MadeChange;
+LLVM_DUMP_METHOD
+void TypeSetByHwMode::dump() const {
+  dbgs() << *this << '\n';
+}
+
+bool TypeSetByHwMode::intersect(SetType &Out, const SetType &In) {
+  bool OutP = Out.count(MVT::iPTR), InP = In.count(MVT::iPTR);
+  auto Int = [&In](MVT T) -> bool { return !In.count(T); };
+
+  if (OutP == InP)
+    return berase_if(Out, Int);
+
+  // Compute the intersection of scalars separately to account for only
+  // one set containing iPTR.
+  // The itersection of iPTR with a set of integer scalar types that does not
+  // include iPTR will result in the most specific scalar type:
+  // - iPTR is more specific than any set with two elements or more
+  // - iPTR is less specific than any single integer scalar type.
+  // For example
+  // { iPTR } * { i32 }     -> { i32 }
+  // { iPTR } * { i32 i64 } -> { iPTR }
+  // and
+  // { iPTR i32 } * { i32 }          -> { i32 }
+  // { iPTR i32 } * { i32 i64 }      -> { i32 i64 }
+  // { iPTR i32 } * { i32 i64 i128 } -> { iPTR i32 }
+
+  // Compute the difference between the two sets in such a way that the
+  // iPTR is in the set that is being subtracted. This is to see if there
+  // are any extra scalars in the set without iPTR that are not in the
+  // set containing iPTR. Then the iPTR could be considered a "wildcard"
+  // matching these scalars. If there is only one such scalar, it would
+  // replace the iPTR, if there are more, the iPTR would be retained.
+  SetType Diff;
+  if (InP) {
+    Diff = Out;
+    berase_if(Diff, [&In](MVT T) { return In.count(T); });
+    // Pre-remove these elements and rely only on InP/OutP to determine
+    // whether a change has been made.
+    berase_if(Out, [&Diff](MVT T) { return Diff.count(T); });
+  } else {
+    Diff = In;
+    berase_if(Diff, [&Out](MVT T) { return Out.count(T); });
+    Out.erase(MVT::iPTR);
+  }
+
+  // The actual intersection.
+  bool Changed = berase_if(Out, Int);
+  unsigned NumD = Diff.size();
+  if (NumD == 0)
+    return Changed;
+
+  if (NumD == 1) {
+    Out.insert(*Diff.begin());
+    // This is a change only if Out was the one with iPTR (which is now
+    // being replaced).
+    Changed |= OutP;
+  } else {
+    // Multiple elements from Out are now replaced with iPTR.
+    Out.insert(MVT::iPTR);
+    Changed |= !OutP;
   }
+  return Changed;
+}
 
-  // If this is a type list and the RHS is a typelist as well, eliminate entries
-  // from this list that aren't in the other one.
-  TypeSet InputSet(*this);
+void TypeSetByHwMode::validate() const {
+#ifndef NDEBUG
+  if (empty())
+    return;
+  bool AllEmpty = true;
+  for (const auto &I : *this)
+    AllEmpty &= I.second.empty();
+  assert(!AllEmpty &&
+          "type set is empty for each HW mode: type contradiction?");
+#endif
+}
 
-  TypeVec.clear();
-  std::set_intersection(InputSet.TypeVec.begin(), InputSet.TypeVec.end(),
-                        InVT.TypeVec.begin(), InVT.TypeVec.end(),
-                        std::back_inserter(TypeVec));
+// --- TypeInfer
 
-  // If the intersection is the same size as the original set then we're done.
-  if (TypeVec.size() == InputSet.TypeVec.size())
+bool TypeInfer::MergeInTypeInfo(TypeSetByHwMode &Out,
+                                const TypeSetByHwMode &In) {
+  ValidateOnExit _1(Out);
+  In.validate();
+  if (In.empty() || Out == In || TP.hasError())
     return false;
-
-  // If we removed all of our types, we have a type contradiction.
-  if (!TypeVec.empty())
+  if (Out.empty()) {
+    Out = In;
     return true;
+  }
 
-  // FIXME: Really want an SMLoc here!
-  TP.error("Type inference contradiction found, merging '" +
-           InVT.getName() + "' into '" + InputSet.getName() + "'");
-  return false;
+  bool Changed = Out.constrain(In);
+  if (Changed && Out.empty())
+    TP.error("Type contradiction");
+
+  return Changed;
 }
 
-/// EnforceInteger - Remove all non-integer types from this set.
-bool EEVT::TypeSet::EnforceInteger(TreePattern &TP) {
+bool TypeInfer::forceArbitrary(TypeSetByHwMode &Out) {
+  ValidateOnExit _1(Out);
   if (TP.hasError())
     return false;
-  // If we know nothing, then get the full set.
-  if (TypeVec.empty())
-    return FillWithPossibleTypes(TP, isInteger, "integer");
-
-  if (!hasFloatingPointTypes())
-    return false;
+  assert(!Out.empty() && "cannot pick from an empty set");
 
-  TypeSet InputSet(*this);
-
-  // Filter out all the fp types.
-  TypeVec.erase(remove_if(TypeVec, std::not1(std::ptr_fun(isInteger))),
-                TypeVec.end());
-
-  if (TypeVec.empty()) {
-    TP.error("Type inference contradiction found, '" +
-             InputSet.getName() + "' needs to be integer");
-    return false;
+  bool Changed = false;
+  for (auto &I : Out) {
+    TypeSetByHwMode::SetType &S = I.second;
+    if (S.size() <= 1)
+      continue;
+    MVT T = *S.begin(); // Pick the first element.
+    S.clear();
+    S.insert(T);
+    Changed = true;
   }
-  return true;
+  return Changed;
 }
 
-/// EnforceFloatingPoint - Remove all integer types from this set.
-bool EEVT::TypeSet::EnforceFloatingPoint(TreePattern &TP) {
+bool TypeInfer::EnforceInteger(TypeSetByHwMode &Out) {
+  ValidateOnExit _1(Out);
   if (TP.hasError())
     return false;
-  // If we know nothing, then get the full set.
-  if (TypeVec.empty())
-    return FillWithPossibleTypes(TP, isFloatingPoint, "floating point");
+  if (!Out.empty())
+    return Out.constrain(isIntegerOrPtr);
 
-  if (!hasIntegerTypes())
-    return false;
+  return Out.assign_if(getLegalTypes(), isIntegerOrPtr);
+}
 
-  TypeSet InputSet(*this);
+bool TypeInfer::EnforceFloatingPoint(TypeSetByHwMode &Out) {
+  ValidateOnExit _1(Out);
+  if (TP.hasError())
+    return false;
+  if (!Out.empty())
+    return Out.constrain(isFloatingPoint);
 
-  // Filter out all the integer types.
-  TypeVec.erase(remove_if(TypeVec, std::not1(std::ptr_fun(isFloatingPoint))),
-                TypeVec.end());
+  return Out.assign_if(getLegalTypes(), isFloatingPoint);
+}
 
-  if (TypeVec.empty()) {
-    TP.error("Type inference contradiction found, '" +
-             InputSet.getName() + "' needs to be floating point");
+bool TypeInfer::EnforceScalar(TypeSetByHwMode &Out) {
+  ValidateOnExit _1(Out);
+  if (TP.hasError())
     return false;
-  }
-  return true;
+  if (!Out.empty())
+    return Out.constrain(isScalar);
+
+  return Out.assign_if(getLegalTypes(), isScalar);
 }
 
-/// EnforceScalar - Remove all vector types from this.
-bool EEVT::TypeSet::EnforceScalar(TreePattern &TP) {
+bool TypeInfer::EnforceVector(TypeSetByHwMode &Out) {
+  ValidateOnExit _1(Out);
   if (TP.hasError())
     return false;
+  if (!Out.empty())
+    return Out.constrain(isVector);
 
-  // If we know nothing, then get the full set.
-  if (TypeVec.empty())
-    return FillWithPossibleTypes(TP, isScalar, "scalar");
+  return Out.assign_if(getLegalTypes(), isVector);
+}
 
-  if (!hasVectorTypes())
+bool TypeInfer::EnforceAny(TypeSetByHwMode &Out) {
+  ValidateOnExit _1(Out);
+  if (TP.hasError() || !Out.empty())
     return false;
 
-  TypeSet InputSet(*this);
+  Out = getLegalTypes();
+  return true;
+}
 
-  // Filter out all the vector types.
-  TypeVec.erase(remove_if(TypeVec, std::not1(std::ptr_fun(isScalar))),
-                TypeVec.end());
+template <typename Iter, typename Pred, typename Less>
+static Iter min_if(Iter B, Iter E, Pred P, Less L) {
+  if (B == E)
+    return E;
+  Iter Min = E;
+  for (Iter I = B; I != E; ++I) {
+    if (!P(*I))
+      continue;
+    if (Min == E || L(*I, *Min))
+      Min = I;
+  }
+  return Min;
+}
 
-  if (TypeVec.empty()) {
-    TP.error("Type inference contradiction found, '" +
-             InputSet.getName() + "' needs to be scalar");
-    return false;
+template <typename Iter, typename Pred, typename Less>
+static Iter max_if(Iter B, Iter E, Pred P, Less L) {
+  if (B == E)
+    return E;
+  Iter Max = E;
+  for (Iter I = B; I != E; ++I) {
+    if (!P(*I))
+      continue;
+    if (Max == E || L(*Max, *I))
+      Max = I;
   }
-  return true;
+  return Max;
 }
 
-/// EnforceVector - Remove all vector types from this.
-bool EEVT::TypeSet::EnforceVector(TreePattern &TP) {
+/// Make sure that for each type in Small, there exists a larger type in Big.
+bool TypeInfer::EnforceSmallerThan(TypeSetByHwMode &Small,
+                                   TypeSetByHwMode &Big) {
+  ValidateOnExit _1(Small), _2(Big);
   if (TP.hasError())
     return false;
+  bool Changed = false;
+
+  if (Small.empty())
+    Changed |= EnforceAny(Small);
+  if (Big.empty())
+    Changed |= EnforceAny(Big);
+
+  assert(Small.hasDefault() && Big.hasDefault());
+
+  std::vector<unsigned> Modes = union_modes(Small, Big);
+
+  // 1. Only allow integer or floating point types and make sure that
+  //    both sides are both integer or both floating point.
+  // 2. Make sure that either both sides have vector types, or neither
+  //    of them does.
+  for (unsigned M : Modes) {
+    TypeSetByHwMode::SetType &S = Small.get(M);
+    TypeSetByHwMode::SetType &B = Big.get(M);
+
+    if (any_of(S, isIntegerOrPtr) && any_of(S, isIntegerOrPtr)) {
+      auto NotInt = [](MVT VT) { return !isIntegerOrPtr(VT); };
+      Changed |= berase_if(S, NotInt) |
+                 berase_if(B, NotInt);
+    } else if (any_of(S, isFloatingPoint) && any_of(B, isFloatingPoint)) {
+      auto NotFP = [](MVT VT) { return !isFloatingPoint(VT); };
+      Changed |= berase_if(S, NotFP) |
+                 berase_if(B, NotFP);
+    } else if (S.empty() || B.empty()) {
+      Changed = !S.empty() || !B.empty();
+      S.clear();
+      B.clear();
+    } else {
+      TP.error("Incompatible types");
+      return Changed;
+    }
 
-  // If we know nothing, then get the full set.
-  if (TypeVec.empty())
-    return FillWithPossibleTypes(TP, isVector, "vector");
-
-  TypeSet InputSet(*this);
-  bool MadeChange = false;
-
-  // Filter out all the scalar types.
-  TypeVec.erase(remove_if(TypeVec, std::not1(std::ptr_fun(isVector))),
-                TypeVec.end());
-
-  if (TypeVec.empty()) {
-    TP.error("Type inference contradiction found, '" +
-             InputSet.getName() + "' needs to be a vector");
-    return false;
+    if (none_of(S, isVector) || none_of(B, isVector)) {
+      Changed |= berase_if(S, isVector) |
+                 berase_if(B, isVector);
+    }
   }
-  return MadeChange;
-}
 
+  auto LT = [](MVT A, MVT B) -> bool {
+    return A.getScalarSizeInBits() < B.getScalarSizeInBits() ||
+           (A.getScalarSizeInBits() == B.getScalarSizeInBits() &&
+            A.getSizeInBits() < B.getSizeInBits());
+  };
+  auto LE = [](MVT A, MVT B) -> bool {
+    // This function is used when removing elements: when a vector is compared
+    // to a non-vector, it should return false (to avoid removal).
+    if (A.isVector() != B.isVector())
+      return false;
 
+    // Note on the < comparison below:
+    // X86 has patterns like
+    //   (set VR128X:$dst, (v16i8 (X86vtrunc (v4i32 VR128X:$src1)))),
+    // where the truncated vector is given a type v16i8, while the source
+    // vector has type v4i32. They both have the same size in bits.
+    // The minimal type in the result is obviously v16i8, and when we remove
+    // all types from the source that are smaller-or-equal than v8i16, the
+    // only source type would also be removed (since it's equal in size).
+    return A.getScalarSizeInBits() <= B.getScalarSizeInBits() ||
+           A.getSizeInBits() < B.getSizeInBits();
+  };
+
+  for (unsigned M : Modes) {
+    TypeSetByHwMode::SetType &S = Small.get(M);
+    TypeSetByHwMode::SetType &B = Big.get(M);
+    // MinS = min scalar in Small, remove all scalars from Big that are
+    // smaller-or-equal than MinS.
+    auto MinS = min_if(S.begin(), S.end(), isScalar, LT);
+    if (MinS != S.end())
+      Changed |= berase_if(B, std::bind(LE, std::placeholders::_1, *MinS));
+
+    // MaxS = max scalar in Big, remove all scalars from Small that are
+    // larger than MaxS.
+    auto MaxS = max_if(B.begin(), B.end(), isScalar, LT);
+    if (MaxS != B.end())
+      Changed |= berase_if(S, std::bind(LE, *MaxS, std::placeholders::_1));
+
+    // MinV = min vector in Small, remove all vectors from Big that are
+    // smaller-or-equal than MinV.
+    auto MinV = min_if(S.begin(), S.end(), isVector, LT);
+    if (MinV != S.end())
+      Changed |= berase_if(B, std::bind(LE, std::placeholders::_1, *MinV));
+
+    // MaxV = max vector in Big, remove all vectors from Small that are
+    // larger than MaxV.
+    auto MaxV = max_if(B.begin(), B.end(), isVector, LT);
+    if (MaxV != B.end())
+      Changed |= berase_if(S, std::bind(LE, *MaxV, std::placeholders::_1));
+  }
+
+  return Changed;
+}
 
-/// EnforceSmallerThan - 'this' must be a smaller VT than Other. For vectors
-/// this should be based on the element type. Update this and other based on
-/// this information.
-bool EEVT::TypeSet::EnforceSmallerThan(EEVT::TypeSet &Other, TreePattern &TP) {
+/// 1. Ensure that for each type T in Vec, T is a vector type, and that
+///    for each type U in Elem, U is a scalar type.
+/// 2. Ensure that for each (scalar) type U in Elem, there exists a (vector)
+///    type T in Vec, such that U is the element type of T.
+bool TypeInfer::EnforceVectorEltTypeIs(TypeSetByHwMode &Vec,
+                                       TypeSetByHwMode &Elem) {
+  ValidateOnExit _1(Vec), _2(Elem);
   if (TP.hasError())
     return false;
+  bool Changed = false;
+
+  if (Vec.empty())
+    Changed |= EnforceVector(Vec);
+  if (Elem.empty())
+    Changed |= EnforceScalar(Elem);
+
+  for (unsigned M : union_modes(Vec, Elem)) {
+    TypeSetByHwMode::SetType &V = Vec.get(M);
+    TypeSetByHwMode::SetType &E = Elem.get(M);
+
+    Changed |= berase_if(V, isScalar);  // Scalar = !vector
+    Changed |= berase_if(E, isVector);  // Vector = !scalar
+    assert(!V.empty() && !E.empty());
+
+    SmallSet<MVT,4> VT, ST;
+    // Collect element types from the "vector" set.
+    for (MVT T : V)
+      VT.insert(T.getVectorElementType());
+    // Collect scalar types from the "element" set.
+    for (MVT T : E)
+      ST.insert(T);
+
+    // Remove from V all (vector) types whose element type is not in S.
+    Changed |= berase_if(V, [&ST](MVT T) -> bool {
+                              return !ST.count(T.getVectorElementType());
+                            });
+    // Remove from E all (scalar) types, for which there is no corresponding
+    // type in V.
+    Changed |= berase_if(E, [&VT](MVT T) -> bool { return !VT.count(T); });
+  }
+
+  return Changed;
+}
 
-  // Both operands must be integer or FP, but we don't care which.
-  bool MadeChange = false;
-
-  if (isCompletelyUnknown())
-    MadeChange = FillWithPossibleTypes(TP);
-
-  if (Other.isCompletelyUnknown())
-    MadeChange = Other.FillWithPossibleTypes(TP);
-
-  // If one side is known to be integer or known to be FP but the other side has
-  // no information, get at least the type integrality info in there.
-  if (!hasFloatingPointTypes())
-    MadeChange |= Other.EnforceInteger(TP);
-  else if (!hasIntegerTypes())
-    MadeChange |= Other.EnforceFloatingPoint(TP);
-  if (!Other.hasFloatingPointTypes())
-    MadeChange |= EnforceInteger(TP);
-  else if (!Other.hasIntegerTypes())
-    MadeChange |= EnforceFloatingPoint(TP);
-
-  assert(!isCompletelyUnknown() && !Other.isCompletelyUnknown() &&
-         "Should have a type list now");
-
-  // If one contains vectors but the other doesn't pull vectors out.
-  if (!hasVectorTypes())
-    MadeChange |= Other.EnforceScalar(TP);
-  else if (!hasScalarTypes())
-    MadeChange |= Other.EnforceVector(TP);
-  if (!Other.hasVectorTypes())
-    MadeChange |= EnforceScalar(TP);
-  else if (!Other.hasScalarTypes())
-    MadeChange |= EnforceVector(TP);
-
-  // This code does not currently handle nodes which have multiple types,
-  // where some types are integer, and some are fp.  Assert that this is not
-  // the case.
-  assert(!(hasIntegerTypes() && hasFloatingPointTypes()) &&
-         !(Other.hasIntegerTypes() && Other.hasFloatingPointTypes()) &&
-         "SDTCisOpSmallerThanOp does not handle mixed int/fp types!");
+bool TypeInfer::EnforceVectorEltTypeIs(TypeSetByHwMode &Vec,
+                                       const ValueTypeByHwMode &VVT) {
+  TypeSetByHwMode Tmp(VVT);
+  ValidateOnExit _1(Vec), _2(Tmp);
+  return EnforceVectorEltTypeIs(Vec, Tmp);
+}
 
+/// Ensure that for each type T in Sub, T is a vector type, and there
+/// exists a type U in Vec such that U is a vector type with the same
+/// element type as T and at least as many elements as T.
+bool TypeInfer::EnforceVectorSubVectorTypeIs(TypeSetByHwMode &Vec,
+                                             TypeSetByHwMode &Sub) {
+  ValidateOnExit _1(Vec), _2(Sub);
   if (TP.hasError())
     return false;
 
-  // Okay, find the smallest type from current set and remove anything the
-  // same or smaller from the other set. We need to ensure that the scalar
-  // type size is smaller than the scalar size of the smallest type. For
-  // vectors, we also need to make sure that the total size is no larger than
-  // the size of the smallest type.
-  {
-    TypeSet InputSet(Other);
-    MVT Smallest = *std::min_element(TypeVec.begin(), TypeVec.end(),
-      [](MVT A, MVT B) {
-        return A.getScalarSizeInBits() < B.getScalarSizeInBits() ||
-               (A.getScalarSizeInBits() == B.getScalarSizeInBits() &&
-                A.getSizeInBits() < B.getSizeInBits());
-      });
-
-    auto I = remove_if(Other.TypeVec, [Smallest](MVT OtherVT) {
-      // Don't compare vector and non-vector types.
-      if (OtherVT.isVector() != Smallest.isVector())
-        return false;
-      // The getSizeInBits() check here is only needed for vectors, but is
-      // a subset of the scalar check for scalars so no need to qualify.
-      return OtherVT.getScalarSizeInBits() <= Smallest.getScalarSizeInBits() ||
-             OtherVT.getSizeInBits() < Smallest.getSizeInBits();
-    });
-    MadeChange |= I != Other.TypeVec.end(); // If we're about to remove types.
-    Other.TypeVec.erase(I, Other.TypeVec.end());
-
-    if (Other.TypeVec.empty()) {
-      TP.error("Type inference contradiction found, '" + InputSet.getName() +
-               "' has nothing larger than '" + getName() +"'!");
+  /// Return true if B is a suB-vector of P, i.e. P is a suPer-vector of B.
+  auto IsSubVec = [](MVT B, MVT P) -> bool {
+    if (!B.isVector() || !P.isVector())
       return false;
-    }
-  }
+    // Logically a <4 x i32> is a valid subvector of <n x 4 x i32>
+    // but until there are obvious use-cases for this, keep the
+    // types separate.
+    if (B.isScalableVector() != P.isScalableVector())
+      return false;
+    if (B.getVectorElementType() != P.getVectorElementType())
+      return false;
+    return B.getVectorNumElements() < P.getVectorNumElements();
+  };
+
+  /// Return true if S has no element (vector type) that T is a sub-vector of,
+  /// i.e. has the same element type as T and more elements.
+  auto NoSubV = [&IsSubVec](const TypeSetByHwMode::SetType &S, MVT T) -> bool {
+    for (const auto &I : S)
+      if (IsSubVec(T, I))
+        return false;
+    return true;
+  };
 
-  // Okay, find the largest type from the other set and remove anything the
-  // same or smaller from the current set. We need to ensure that the scalar
-  // type size is larger than the scalar size of the largest type. For
-  // vectors, we also need to make sure that the total size is no smaller than
-  // the size of the largest type.
-  {
-    TypeSet InputSet(*this);
-    MVT Largest = *std::max_element(Other.TypeVec.begin(), Other.TypeVec.end(),
-      [](MVT A, MVT B) {
-        return A.getScalarSizeInBits() < B.getScalarSizeInBits() ||
-               (A.getScalarSizeInBits() == B.getScalarSizeInBits() &&
-                A.getSizeInBits() < B.getSizeInBits());
-      });
-    auto I = remove_if(TypeVec, [Largest](MVT OtherVT) {
-      // Don't compare vector and non-vector types.
-      if (OtherVT.isVector() != Largest.isVector())
+  /// Return true if S has no element (vector type) that T is a super-vector
+  /// of, i.e. has the same element type as T and fewer elements.
+  auto NoSupV = [&IsSubVec](const TypeSetByHwMode::SetType &S, MVT T) -> bool {
+    for (const auto &I : S)
+      if (IsSubVec(I, T))
         return false;
-      return OtherVT.getScalarSizeInBits() >= Largest.getScalarSizeInBits() ||
-             OtherVT.getSizeInBits() > Largest.getSizeInBits();
-    });
-    MadeChange |= I != TypeVec.end(); // If we're about to remove types.
-    TypeVec.erase(I, TypeVec.end());
-
-    if (TypeVec.empty()) {
-      TP.error("Type inference contradiction found, '" + InputSet.getName() +
-               "' has nothing smaller than '" + Other.getName() +"'!");
-      return false;
-    }
-  }
+    return true;
+  };
 
-  return MadeChange;
-}
+  bool Changed = false;
 
-/// EnforceVectorEltTypeIs - 'this' is now constrained to be a vector type
-/// whose element is specified by VTOperand.
-bool EEVT::TypeSet::EnforceVectorEltTypeIs(MVT::SimpleValueType VT,
-                                           TreePattern &TP) {
-  bool MadeChange = false;
+  if (Vec.empty())
+    Changed |= EnforceVector(Vec);
+  if (Sub.empty())
+    Changed |= EnforceVector(Sub);
 
-  MadeChange |= EnforceVector(TP);
+  for (unsigned M : union_modes(Vec, Sub)) {
+    TypeSetByHwMode::SetType &S = Sub.get(M);
+    TypeSetByHwMode::SetType &V = Vec.get(M);
 
-  TypeSet InputSet(*this);
+    Changed |= berase_if(S, isScalar);
 
-  // Filter out all the types which don't have the right element type.
-  auto I = remove_if(TypeVec, [VT](MVT VVT) {
-    return VVT.getVectorElementType().SimpleTy != VT;
-  });
-  MadeChange |= I != TypeVec.end();
-  TypeVec.erase(I, TypeVec.end());
+    // Erase all types from S that are not sub-vectors of a type in V.
+    Changed |= berase_if(S, std::bind(NoSubV, V, std::placeholders::_1));
 
-  if (TypeVec.empty()) {  // FIXME: Really want an SMLoc here!
-    TP.error("Type inference contradiction found, forcing '" +
-             InputSet.getName() + "' to have a vector element of type " +
-             getEnumName(VT));
-    return false;
+    // Erase all types from V that are not super-vectors of a type in S.
+    Changed |= berase_if(V, std::bind(NoSupV, S, std::placeholders::_1));
   }
 
-  return MadeChange;
+  return Changed;
 }
 
-/// EnforceVectorEltTypeIs - 'this' is now constrained to be a vector type
-/// whose element is specified by VTOperand.
-bool EEVT::TypeSet::EnforceVectorEltTypeIs(EEVT::TypeSet &VTOperand,
-                                           TreePattern &TP) {
+/// 1. Ensure that V has a scalar type iff W has a scalar type.
+/// 2. Ensure that for each vector type T in V, there exists a vector
+///    type U in W, such that T and U have the same number of elements.
+/// 3. Ensure that for each vector type U in W, there exists a vector
+///    type T in V, such that T and U have the same number of elements
+///    (reverse of 2).
+bool TypeInfer::EnforceSameNumElts(TypeSetByHwMode &V, TypeSetByHwMode &W) {
+  ValidateOnExit _1(V), _2(W);
   if (TP.hasError())
     return false;
 
-  // "This" must be a vector and "VTOperand" must be a scalar.
-  bool MadeChange = false;
-  MadeChange |= EnforceVector(TP);
-  MadeChange |= VTOperand.EnforceScalar(TP);
-
-  // If we know the vector type, it forces the scalar to agree.
-  if (isConcrete()) {
-    MVT IVT = getConcrete();
-    IVT = IVT.getVectorElementType();
-    return MadeChange || VTOperand.MergeInTypeInfo(IVT.SimpleTy, TP);
-  }
-
-  // If the scalar type is known, filter out vector types whose element types
-  // disagree.
-  if (!VTOperand.isConcrete())
-    return MadeChange;
-
-  MVT::SimpleValueType VT = VTOperand.getConcrete();
-
-  MadeChange |= EnforceVectorEltTypeIs(VT, TP);
-
-  return MadeChange;
+  bool Changed = false;
+  if (V.empty())
+    Changed |= EnforceAny(V);
+  if (W.empty())
+    Changed |= EnforceAny(W);
+
+  // An actual vector type cannot have 0 elements, so we can treat scalars
+  // as zero-length vectors. This way both vectors and scalars can be
+  // processed identically.
+  auto NoLength = [](const SmallSet<unsigned,2> &Lengths, MVT T) -> bool {
+    return !Lengths.count(T.isVector() ? T.getVectorNumElements() : 0);
+  };
+
+  for (unsigned M : union_modes(V, W)) {
+    TypeSetByHwMode::SetType &VS = V.get(M);
+    TypeSetByHwMode::SetType &WS = W.get(M);
+
+    SmallSet<unsigned,2> VN, WN;
+    for (MVT T : VS)
+      VN.insert(T.isVector() ? T.getVectorNumElements() : 0);
+    for (MVT T : WS)
+      WN.insert(T.isVector() ? T.getVectorNumElements() : 0);
+
+    Changed |= berase_if(VS, std::bind(NoLength, WN, std::placeholders::_1));
+    Changed |= berase_if(WS, std::bind(NoLength, VN, std::placeholders::_1));
+  }
+  return Changed;
 }
 
-/// EnforceVectorSubVectorTypeIs - 'this' is now constrained to be a
-/// vector type specified by VTOperand.
-bool EEVT::TypeSet::EnforceVectorSubVectorTypeIs(EEVT::TypeSet &VTOperand,
-                                                 TreePattern &TP) {
+/// 1. Ensure that for each type T in A, there exists a type U in B,
+///    such that T and U have equal size in bits.
+/// 2. Ensure that for each type U in B, there exists a type T in A
+///    such that T and U have equal size in bits (reverse of 1).
+bool TypeInfer::EnforceSameSize(TypeSetByHwMode &A, TypeSetByHwMode &B) {
+  ValidateOnExit _1(A), _2(B);
   if (TP.hasError())
     return false;
+  bool Changed = false;
+  if (A.empty())
+    Changed |= EnforceAny(A);
+  if (B.empty())
+    Changed |= EnforceAny(B);
 
-  // "This" must be a vector and "VTOperand" must be a vector.
-  bool MadeChange = false;
-  MadeChange |= EnforceVector(TP);
-  MadeChange |= VTOperand.EnforceVector(TP);
-
-  // If one side is known to be integer or known to be FP but the other side has
-  // no information, get at least the type integrality info in there.
-  if (!hasFloatingPointTypes())
-    MadeChange |= VTOperand.EnforceInteger(TP);
-  else if (!hasIntegerTypes())
-    MadeChange |= VTOperand.EnforceFloatingPoint(TP);
-  if (!VTOperand.hasFloatingPointTypes())
-    MadeChange |= EnforceInteger(TP);
-  else if (!VTOperand.hasIntegerTypes())
-    MadeChange |= EnforceFloatingPoint(TP);
-
-  assert(!isCompletelyUnknown() && !VTOperand.isCompletelyUnknown() &&
-         "Should have a type list now");
-
-  // If we know the vector type, it forces the scalar types to agree.
-  // Also force one vector to have more elements than the other.
-  if (isConcrete()) {
-    MVT IVT = getConcrete();
-    unsigned NumElems = IVT.getVectorNumElements();
-    IVT = IVT.getVectorElementType();
-
-    EEVT::TypeSet EltTypeSet(IVT.SimpleTy, TP);
-    MadeChange |= VTOperand.EnforceVectorEltTypeIs(EltTypeSet, TP);
-
-    // Only keep types that have less elements than VTOperand.
-    TypeSet InputSet(VTOperand);
-
-    auto I = remove_if(VTOperand.TypeVec, [NumElems](MVT VVT) {
-      return VVT.getVectorNumElements() >= NumElems;
-    });
-    MadeChange |= I != VTOperand.TypeVec.end();
-    VTOperand.TypeVec.erase(I, VTOperand.TypeVec.end());
-
-    if (VTOperand.TypeVec.empty()) {  // FIXME: Really want an SMLoc here!
-      TP.error("Type inference contradiction found, forcing '" +
-               InputSet.getName() + "' to have less vector elements than '" +
-               getName() + "'");
-      return false;
-    }
-  } else if (VTOperand.isConcrete()) {
-    MVT IVT = VTOperand.getConcrete();
-    unsigned NumElems = IVT.getVectorNumElements();
-    IVT = IVT.getVectorElementType();
-
-    EEVT::TypeSet EltTypeSet(IVT.SimpleTy, TP);
-    MadeChange |= EnforceVectorEltTypeIs(EltTypeSet, TP);
+  auto NoSize = [](const SmallSet<unsigned,2> &Sizes, MVT T) -> bool {
+    return !Sizes.count(T.getSizeInBits());
+  };
 
-    // Only keep types that have more elements than 'this'.
-    TypeSet InputSet(*this);
+  for (unsigned M : union_modes(A, B)) {
+    TypeSetByHwMode::SetType &AS = A.get(M);
+    TypeSetByHwMode::SetType &BS = B.get(M);
+    SmallSet<unsigned,2> AN, BN;
 
-    auto I = remove_if(TypeVec, [NumElems](MVT VVT) {
-      return VVT.getVectorNumElements() <= NumElems;
-    });
-    MadeChange |= I != TypeVec.end();
-    TypeVec.erase(I, TypeVec.end());
+    for (MVT T : AS)
+      AN.insert(T.getSizeInBits());
+    for (MVT T : BS)
+      BN.insert(T.getSizeInBits());
 
-    if (TypeVec.empty()) {  // FIXME: Really want an SMLoc here!
-      TP.error("Type inference contradiction found, forcing '" +
-               InputSet.getName() + "' to have more vector elements than '" +
-               VTOperand.getName() + "'");
-      return false;
-    }
+    Changed |= berase_if(AS, std::bind(NoSize, BN, std::placeholders::_1));
+    Changed |= berase_if(BS, std::bind(NoSize, AN, std::placeholders::_1));
   }
 
-  return MadeChange;
+  return Changed;
 }
 
-/// EnforceameNumElts - If VTOperand is a scalar, then 'this' is a scalar. If
-/// VTOperand is a vector, then 'this' must have the same number of elements.
-bool EEVT::TypeSet::EnforceSameNumElts(EEVT::TypeSet &VTOperand,
-                                       TreePattern &TP) {
-  if (TP.hasError())
-    return false;
-
-  bool MadeChange = false;
+void TypeInfer::expandOverloads(TypeSetByHwMode &VTS) {
+  ValidateOnExit _1(VTS);
+  TypeSetByHwMode Legal = getLegalTypes();
+  bool HaveLegalDef = Legal.hasDefault();
 
-  if (isCompletelyUnknown())
-    MadeChange = FillWithPossibleTypes(TP);
-
-  if (VTOperand.isCompletelyUnknown())
-    MadeChange = VTOperand.FillWithPossibleTypes(TP);
-
-  // If one contains vectors but the other doesn't pull vectors out.
-  if (!hasVectorTypes())
-    MadeChange |= VTOperand.EnforceScalar(TP);
-  else if (!hasScalarTypes())
-    MadeChange |= VTOperand.EnforceVector(TP);
-  if (!VTOperand.hasVectorTypes())
-    MadeChange |= EnforceScalar(TP);
-  else if (!VTOperand.hasScalarTypes())
-    MadeChange |= EnforceVector(TP);
-
-  // If one type is a vector, make sure the other has the same element count.
-  // If this a scalar, then we are already done with the above.
-  if (isConcrete()) {
-    MVT IVT = getConcrete();
-    if (IVT.isVector()) {
-      unsigned NumElems = IVT.getVectorNumElements();
-
-      // Only keep types that have same elements as 'this'.
-      TypeSet InputSet(VTOperand);
-
-      auto I = remove_if(VTOperand.TypeVec, [NumElems](MVT VVT) {
-        return VVT.getVectorNumElements() != NumElems;
-      });
-      MadeChange |= I != VTOperand.TypeVec.end();
-      VTOperand.TypeVec.erase(I, VTOperand.TypeVec.end());
-
-      if (VTOperand.TypeVec.empty()) {  // FIXME: Really want an SMLoc here!
-        TP.error("Type inference contradiction found, forcing '" +
-                 InputSet.getName() + "' to have same number elements as '" +
-                 getName() + "'");
-        return false;
-      }
+  for (auto &I : VTS) {
+    unsigned M = I.first;
+    if (!Legal.hasMode(M) && !HaveLegalDef) {
+      TP.error("Invalid mode " + Twine(M));
+      return;
     }
-  } else if (VTOperand.isConcrete()) {
-    MVT IVT = VTOperand.getConcrete();
-    if (IVT.isVector()) {
-      unsigned NumElems = IVT.getVectorNumElements();
-
-      // Only keep types that have same elements as VTOperand.
-      TypeSet InputSet(*this);
+    expandOverloads(I.second, Legal.get(M));
+  }
+}
 
-      auto I = remove_if(TypeVec, [NumElems](MVT VVT) {
-        return VVT.getVectorNumElements() != NumElems;
-      });
-      MadeChange |= I != TypeVec.end();
-      TypeVec.erase(I, TypeVec.end());
+void TypeInfer::expandOverloads(TypeSetByHwMode::SetType &Out,
+                                const TypeSetByHwMode::SetType &Legal) {
+  std::set<MVT> Ovs;
+  for (MVT T : Out) {
+    if (!T.isOverloaded())
+      continue;
 
-      if (TypeVec.empty()) {  // FIXME: Really want an SMLoc here!
-        TP.error("Type inference contradiction found, forcing '" +
-                 InputSet.getName() + "' to have same number elements than '" +
-                 VTOperand.getName() + "'");
-        return false;
-      }
+    Ovs.insert(T);
+    // MachineValueTypeSet allows iteration and erasing.
+    Out.erase(T);
+  }
+
+  for (MVT Ov : Ovs) {
+    switch (Ov.SimpleTy) {
+      case MVT::iPTRAny:
+        Out.insert(MVT::iPTR);
+        return;
+      case MVT::iAny:
+        for (MVT T : MVT::integer_valuetypes())
+          if (Legal.count(T))
+            Out.insert(T);
+        for (MVT T : MVT::integer_vector_valuetypes())
+          if (Legal.count(T))
+            Out.insert(T);
+        return;
+      case MVT::fAny:
+        for (MVT T : MVT::fp_valuetypes())
+          if (Legal.count(T))
+            Out.insert(T);
+        for (MVT T : MVT::fp_vector_valuetypes())
+          if (Legal.count(T))
+            Out.insert(T);
+        return;
+      case MVT::vAny:
+        for (MVT T : MVT::vector_valuetypes())
+          if (Legal.count(T))
+            Out.insert(T);
+        return;
+      case MVT::Any:
+        for (MVT T : MVT::all_valuetypes())
+          if (Legal.count(T))
+            Out.insert(T);
+        return;
+      default:
+        break;
     }
   }
+}
 
-  return MadeChange;
+TypeSetByHwMode TypeInfer::getLegalTypes() {
+  if (!LegalTypesCached) {
+    // Stuff all types from all modes into the default mode.
+    const TypeSetByHwMode &LTS = TP.getDAGPatterns().getLegalTypes();
+    for (const auto &I : LTS)
+      LegalCache.insert(I.second);
+    LegalTypesCached = true;
+  }
+  TypeSetByHwMode VTS;
+  VTS.getOrCreate(DefaultMode) = LegalCache;
+  return VTS;
 }
 
-/// EnforceSameSize - 'this' is now constrained to be same size as VTOperand.
-bool EEVT::TypeSet::EnforceSameSize(EEVT::TypeSet &VTOperand,
-                                    TreePattern &TP) {
-  if (TP.hasError())
-    return false;
+//===----------------------------------------------------------------------===//
+// TreePredicateFn Implementation
+//===----------------------------------------------------------------------===//
 
-  bool MadeChange = false;
+/// TreePredicateFn constructor.  Here 'N' is a subclass of PatFrag.
+TreePredicateFn::TreePredicateFn(TreePattern *N) : PatFragRec(N) {
+  assert(
+      (!hasPredCode() || !hasImmCode()) &&
+      ".td file corrupt: can't have a node predicate *and* an imm predicate");
+}
 
-  if (isCompletelyUnknown())
-    MadeChange = FillWithPossibleTypes(TP);
+bool TreePredicateFn::hasPredCode() const {
+  return isLoad() || isStore() || isAtomic() ||
+         !PatFragRec->getRecord()->getValueAsString("PredicateCode").empty();
+}
 
-  if (VTOperand.isCompletelyUnknown())
-    MadeChange = VTOperand.FillWithPossibleTypes(TP);
+std::string TreePredicateFn::getPredCode() const {
+  std::string Code = "";
 
-  // If we know one of the types, it forces the other type agree.
-  if (isConcrete()) {
-    MVT IVT = getConcrete();
-    unsigned Size = IVT.getSizeInBits();
+  if (!isLoad() && !isStore() && !isAtomic()) {
+    Record *MemoryVT = getMemoryVT();
 
-    // Only keep types that have the same size as 'this'.
-    TypeSet InputSet(VTOperand);
+    if (MemoryVT)
+      PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
+                      "MemoryVT requires IsLoad or IsStore");
+  }
 
-    auto I = remove_if(VTOperand.TypeVec,
-                       [&](MVT VT) { return VT.getSizeInBits() != Size; });
-    MadeChange |= I != VTOperand.TypeVec.end();
-    VTOperand.TypeVec.erase(I, VTOperand.TypeVec.end());
+  if (!isLoad() && !isStore()) {
+    if (isUnindexed())
+      PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
+                      "IsUnindexed requires IsLoad or IsStore");
 
-    if (VTOperand.TypeVec.empty()) {  // FIXME: Really want an SMLoc here!
-      TP.error("Type inference contradiction found, forcing '" +
-               InputSet.getName() + "' to have same size as '" +
-               getName() + "'");
-      return false;
-    }
-  } else if (VTOperand.isConcrete()) {
-    MVT IVT = VTOperand.getConcrete();
-    unsigned Size = IVT.getSizeInBits();
+    Record *ScalarMemoryVT = getScalarMemoryVT();
 
-    // Only keep types that have the same size as VTOperand.
-    TypeSet InputSet(*this);
+    if (ScalarMemoryVT)
+      PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
+                      "ScalarMemoryVT requires IsLoad or IsStore");
+  }
 
-    auto I =
-        remove_if(TypeVec, [&](MVT VT) { return VT.getSizeInBits() != Size; });
-    MadeChange |= I != TypeVec.end();
-    TypeVec.erase(I, TypeVec.end());
+  if (isLoad() + isStore() + isAtomic() > 1)
+    PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
+                    "IsLoad, IsStore, and IsAtomic are mutually exclusive");
 
-    if (TypeVec.empty()) {  // FIXME: Really want an SMLoc here!
-      TP.error("Type inference contradiction found, forcing '" +
-               InputSet.getName() + "' to have same size as '" +
-               VTOperand.getName() + "'");
-      return false;
+  if (isLoad()) {
+    if (!isUnindexed() && !isNonExtLoad() && !isAnyExtLoad() &&
+        !isSignExtLoad() && !isZeroExtLoad() && getMemoryVT() == nullptr &&
+        getScalarMemoryVT() == nullptr)
+      PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
+                      "IsLoad cannot be used by itself");
+  } else {
+    if (isNonExtLoad())
+      PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
+                      "IsNonExtLoad requires IsLoad");
+    if (isAnyExtLoad())
+      PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
+                      "IsAnyExtLoad requires IsLoad");
+    if (isSignExtLoad())
+      PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
+                      "IsSignExtLoad requires IsLoad");
+    if (isZeroExtLoad())
+      PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
+                      "IsZeroExtLoad requires IsLoad");
+  }
+
+  if (isStore()) {
+    if (!isUnindexed() && !isTruncStore() && !isNonTruncStore() &&
+        getMemoryVT() == nullptr && getScalarMemoryVT() == nullptr)
+      PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
+                      "IsStore cannot be used by itself");
+  } else {
+    if (isNonTruncStore())
+      PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
+                      "IsNonTruncStore requires IsStore");
+    if (isTruncStore())
+      PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
+                      "IsTruncStore requires IsStore");
+  }
+
+  if (isAtomic()) {
+    if (getMemoryVT() == nullptr && !isAtomicOrderingMonotonic() &&
+        !isAtomicOrderingAcquire() && !isAtomicOrderingRelease() &&
+        !isAtomicOrderingAcquireRelease() &&
+        !isAtomicOrderingSequentiallyConsistent() &&
+        !isAtomicOrderingAcquireOrStronger() &&
+        !isAtomicOrderingReleaseOrStronger() &&
+        !isAtomicOrderingWeakerThanAcquire() &&
+        !isAtomicOrderingWeakerThanRelease())
+      PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
+                      "IsAtomic cannot be used by itself");
+  } else {
+    if (isAtomicOrderingMonotonic())
+      PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
+                      "IsAtomicOrderingMonotonic requires IsAtomic");
+    if (isAtomicOrderingAcquire())
+      PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
+                      "IsAtomicOrderingAcquire requires IsAtomic");
+    if (isAtomicOrderingRelease())
+      PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
+                      "IsAtomicOrderingRelease requires IsAtomic");
+    if (isAtomicOrderingAcquireRelease())
+      PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
+                      "IsAtomicOrderingAcquireRelease requires IsAtomic");
+    if (isAtomicOrderingSequentiallyConsistent())
+      PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
+                      "IsAtomicOrderingSequentiallyConsistent requires IsAtomic");
+    if (isAtomicOrderingAcquireOrStronger())
+      PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
+                      "IsAtomicOrderingAcquireOrStronger requires IsAtomic");
+    if (isAtomicOrderingReleaseOrStronger())
+      PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
+                      "IsAtomicOrderingReleaseOrStronger requires IsAtomic");
+    if (isAtomicOrderingWeakerThanAcquire())
+      PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
+                      "IsAtomicOrderingWeakerThanAcquire requires IsAtomic");
+  }
+
+  if (isLoad() || isStore() || isAtomic()) {
+    StringRef SDNodeName =
+        isLoad() ? "LoadSDNode" : isStore() ? "StoreSDNode" : "AtomicSDNode";
+
+    Record *MemoryVT = getMemoryVT();
+
+    if (MemoryVT)
+      Code += ("if (cast<" + SDNodeName + ">(N)->getMemoryVT() != MVT::" +
+               MemoryVT->getName() + ") return false;\n")
+                  .str();
+  }
+
+  if (isAtomic() && isAtomicOrderingMonotonic())
+    Code += "if (cast<AtomicSDNode>(N)->getOrdering() != "
+            "AtomicOrdering::Monotonic) return false;\n";
+  if (isAtomic() && isAtomicOrderingAcquire())
+    Code += "if (cast<AtomicSDNode>(N)->getOrdering() != "
+            "AtomicOrdering::Acquire) return false;\n";
+  if (isAtomic() && isAtomicOrderingRelease())
+    Code += "if (cast<AtomicSDNode>(N)->getOrdering() != "
+            "AtomicOrdering::Release) return false;\n";
+  if (isAtomic() && isAtomicOrderingAcquireRelease())
+    Code += "if (cast<AtomicSDNode>(N)->getOrdering() != "
+            "AtomicOrdering::AcquireRelease) return false;\n";
+  if (isAtomic() && isAtomicOrderingSequentiallyConsistent())
+    Code += "if (cast<AtomicSDNode>(N)->getOrdering() != "
+            "AtomicOrdering::SequentiallyConsistent) return false;\n";
+
+  if (isAtomic() && isAtomicOrderingAcquireOrStronger())
+    Code += "if (!isAcquireOrStronger(cast<AtomicSDNode>(N)->getOrdering())) "
+            "return false;\n";
+  if (isAtomic() && isAtomicOrderingWeakerThanAcquire())
+    Code += "if (isAcquireOrStronger(cast<AtomicSDNode>(N)->getOrdering())) "
+            "return false;\n";
+
+  if (isAtomic() && isAtomicOrderingReleaseOrStronger())
+    Code += "if (!isReleaseOrStronger(cast<AtomicSDNode>(N)->getOrdering())) "
+            "return false;\n";
+  if (isAtomic() && isAtomicOrderingWeakerThanRelease())
+    Code += "if (isReleaseOrStronger(cast<AtomicSDNode>(N)->getOrdering())) "
+            "return false;\n";
+
+  if (isLoad() || isStore()) {
+    StringRef SDNodeName = isLoad() ? "LoadSDNode" : "StoreSDNode";
+
+    if (isUnindexed())
+      Code += ("if (cast<" + SDNodeName +
+               ">(N)->getAddressingMode() != ISD::UNINDEXED) "
+               "return false;\n")
+                  .str();
+
+    if (isLoad()) {
+      if ((isNonExtLoad() + isAnyExtLoad() + isSignExtLoad() +
+           isZeroExtLoad()) > 1)
+        PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
+                        "IsNonExtLoad, IsAnyExtLoad, IsSignExtLoad, and "
+                        "IsZeroExtLoad are mutually exclusive");
+      if (isNonExtLoad())
+        Code += "if (cast<LoadSDNode>(N)->getExtensionType() != "
+                "ISD::NON_EXTLOAD) return false;\n";
+      if (isAnyExtLoad())
+        Code += "if (cast<LoadSDNode>(N)->getExtensionType() != ISD::EXTLOAD) "
+                "return false;\n";
+      if (isSignExtLoad())
+        Code += "if (cast<LoadSDNode>(N)->getExtensionType() != ISD::SEXTLOAD) "
+                "return false;\n";
+      if (isZeroExtLoad())
+        Code += "if (cast<LoadSDNode>(N)->getExtensionType() != ISD::ZEXTLOAD) "
+                "return false;\n";
+    } else {
+      if ((isNonTruncStore() + isTruncStore()) > 1)
+        PrintFatalError(
+            getOrigPatFragRecord()->getRecord()->getLoc(),
+            "IsNonTruncStore, and IsTruncStore are mutually exclusive");
+      if (isNonTruncStore())
+        Code +=
+            " if (cast<StoreSDNode>(N)->isTruncatingStore()) return false;\n";
+      if (isTruncStore())
+        Code +=
+            " if (!cast<StoreSDNode>(N)->isTruncatingStore()) return false;\n";
     }
+
+    Record *ScalarMemoryVT = getScalarMemoryVT();
+
+    if (ScalarMemoryVT)
+      Code += ("if (cast<" + SDNodeName +
+               ">(N)->getMemoryVT().getScalarType() != MVT::" +
+               ScalarMemoryVT->getName() + ") return false;\n")
+                  .str();
   }
 
-  return MadeChange;
-}
+  std::string PredicateCode = PatFragRec->getRecord()->getValueAsString("PredicateCode");
 
-//===----------------------------------------------------------------------===//
-// Helpers for working with extended types.
+  Code += PredicateCode;
 
-/// Dependent variable map for CodeGenDAGPattern variant generation
-typedef std::map<std::string, int> DepVarMap;
+  if (PredicateCode.empty() && !Code.empty())
+    Code += "return true;\n";
 
-static void FindDepVarsOf(TreePatternNode *N, DepVarMap &DepMap) {
-  if (N->isLeaf()) {
-    if (isa<DefInit>(N->getLeafValue()))
-      DepMap[N->getName()]++;
-  } else {
-    for (size_t i = 0, e = N->getNumChildren(); i != e; ++i)
-      FindDepVarsOf(N->getChild(i), DepMap);
-  }
-}
-  
-/// Find dependent variables within child patterns
-static void FindDepVars(TreePatternNode *N, MultipleUseVarSet &DepVars) {
-  DepVarMap depcounts;
-  FindDepVarsOf(N, depcounts);
-  for (const std::pair<std::string, int> &Pair : depcounts) {
-    if (Pair.second > 1)
-      DepVars.insert(Pair.first);
-  }
+  return Code;
 }
 
-#ifndef NDEBUG
-/// Dump the dependent variable set:
-static void DumpDepVars(MultipleUseVarSet &DepVars) {
-  if (DepVars.empty()) {
-    DEBUG(errs() << "<empty set>");
-  } else {
-    DEBUG(errs() << "[ ");
-    for (const std::string &DepVar : DepVars) {
-      DEBUG(errs() << DepVar << " ");
-    }
-    DEBUG(errs() << "]");
-  }
+bool TreePredicateFn::hasImmCode() const {
+  return !PatFragRec->getRecord()->getValueAsString("ImmediateCode").empty();
 }
-#endif
 
+std::string TreePredicateFn::getImmCode() const {
+  return PatFragRec->getRecord()->getValueAsString("ImmediateCode");
+}
 
-//===----------------------------------------------------------------------===//
-// TreePredicateFn Implementation
-//===----------------------------------------------------------------------===//
+bool TreePredicateFn::immCodeUsesAPInt() const {
+  return getOrigPatFragRecord()->getRecord()->getValueAsBit("IsAPInt");
+}
 
-/// TreePredicateFn constructor.  Here 'N' is a subclass of PatFrag.
-TreePredicateFn::TreePredicateFn(TreePattern *N) : PatFragRec(N) {
-  assert((getPredCode().empty() || getImmCode().empty()) &&
-        ".td file corrupt: can't have a node predicate *and* an imm predicate");
+bool TreePredicateFn::immCodeUsesAPFloat() const {
+  bool Unset;
+  // The return value will be false when IsAPFloat is unset.
+  return getOrigPatFragRecord()->getRecord()->getValueAsBitOrUnset("IsAPFloat",
+                                                                   Unset);
 }
 
-std::string TreePredicateFn::getPredCode() const {
-  return PatFragRec->getRecord()->getValueAsString("PredicateCode");
+bool TreePredicateFn::isPredefinedPredicateEqualTo(StringRef Field,
+                                                   bool Value) const {
+  bool Unset;
+  bool Result =
+      getOrigPatFragRecord()->getRecord()->getValueAsBitOrUnset(Field, Unset);
+  if (Unset)
+    return false;
+  return Result == Value;
+}
+bool TreePredicateFn::isLoad() const {
+  return isPredefinedPredicateEqualTo("IsLoad", true);
+}
+bool TreePredicateFn::isStore() const {
+  return isPredefinedPredicateEqualTo("IsStore", true);
+}
+bool TreePredicateFn::isAtomic() const {
+  return isPredefinedPredicateEqualTo("IsAtomic", true);
+}
+bool TreePredicateFn::isUnindexed() const {
+  return isPredefinedPredicateEqualTo("IsUnindexed", true);
+}
+bool TreePredicateFn::isNonExtLoad() const {
+  return isPredefinedPredicateEqualTo("IsNonExtLoad", true);
+}
+bool TreePredicateFn::isAnyExtLoad() const {
+  return isPredefinedPredicateEqualTo("IsAnyExtLoad", true);
+}
+bool TreePredicateFn::isSignExtLoad() const {
+  return isPredefinedPredicateEqualTo("IsSignExtLoad", true);
+}
+bool TreePredicateFn::isZeroExtLoad() const {
+  return isPredefinedPredicateEqualTo("IsZeroExtLoad", true);
+}
+bool TreePredicateFn::isNonTruncStore() const {
+  return isPredefinedPredicateEqualTo("IsTruncStore", false);
+}
+bool TreePredicateFn::isTruncStore() const {
+  return isPredefinedPredicateEqualTo("IsTruncStore", true);
+}
+bool TreePredicateFn::isAtomicOrderingMonotonic() const {
+  return isPredefinedPredicateEqualTo("IsAtomicOrderingMonotonic", true);
+}
+bool TreePredicateFn::isAtomicOrderingAcquire() const {
+  return isPredefinedPredicateEqualTo("IsAtomicOrderingAcquire", true);
+}
+bool TreePredicateFn::isAtomicOrderingRelease() const {
+  return isPredefinedPredicateEqualTo("IsAtomicOrderingRelease", true);
+}
+bool TreePredicateFn::isAtomicOrderingAcquireRelease() const {
+  return isPredefinedPredicateEqualTo("IsAtomicOrderingAcquireRelease", true);
+}
+bool TreePredicateFn::isAtomicOrderingSequentiallyConsistent() const {
+  return isPredefinedPredicateEqualTo("IsAtomicOrderingSequentiallyConsistent",
+                                      true);
+}
+bool TreePredicateFn::isAtomicOrderingAcquireOrStronger() const {
+  return isPredefinedPredicateEqualTo("IsAtomicOrderingAcquireOrStronger", true);
+}
+bool TreePredicateFn::isAtomicOrderingWeakerThanAcquire() const {
+  return isPredefinedPredicateEqualTo("IsAtomicOrderingAcquireOrStronger", false);
+}
+bool TreePredicateFn::isAtomicOrderingReleaseOrStronger() const {
+  return isPredefinedPredicateEqualTo("IsAtomicOrderingReleaseOrStronger", true);
+}
+bool TreePredicateFn::isAtomicOrderingWeakerThanRelease() const {
+  return isPredefinedPredicateEqualTo("IsAtomicOrderingReleaseOrStronger", false);
+}
+Record *TreePredicateFn::getMemoryVT() const {
+  Record *R = getOrigPatFragRecord()->getRecord();
+  if (R->isValueUnset("MemoryVT"))
+    return nullptr;
+  return R->getValueAsDef("MemoryVT");
+}
+Record *TreePredicateFn::getScalarMemoryVT() const {
+  Record *R = getOrigPatFragRecord()->getRecord();
+  if (R->isValueUnset("ScalarMemoryVT"))
+    return nullptr;
+  return R->getValueAsDef("ScalarMemoryVT");
 }
 
-std::string TreePredicateFn::getImmCode() const {
-  return PatFragRec->getRecord()->getValueAsString("ImmediateCode");
+StringRef TreePredicateFn::getImmType() const {
+  if (immCodeUsesAPInt())
+    return "const APInt &";
+  if (immCodeUsesAPFloat())
+    return "const APFloat &";
+  return "int64_t";
 }
 
+StringRef TreePredicateFn::getImmTypeIdentifier() const {
+  if (immCodeUsesAPInt())
+    return "APInt";
+  else if (immCodeUsesAPFloat())
+    return "APFloat";
+  return "I64";
+}
 
 /// isAlwaysTrue - Return true if this is a noop predicate.
 bool TreePredicateFn::isAlwaysTrue() const {
-  return getPredCode().empty() && getImmCode().empty();
+  return !hasPredCode() && !hasImmCode();
 }
 
 /// Return the name to use in the generated code to reference this, this is
@@ -790,14 +1157,61 @@ std::string TreePredicateFn::getCodeToRunOnSDNode() const {
   // Handle immediate predicates first.
   std::string ImmCode = getImmCode();
   if (!ImmCode.empty()) {
-    std::string Result =
-      "    int64_t Imm = cast<ConstantSDNode>(Node)->getSExtValue();\n";
+    if (isLoad())
+      PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
+                      "IsLoad cannot be used with ImmLeaf or its subclasses");
+    if (isStore())
+      PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
+                      "IsStore cannot be used with ImmLeaf or its subclasses");
+    if (isUnindexed())
+      PrintFatalError(
+          getOrigPatFragRecord()->getRecord()->getLoc(),
+          "IsUnindexed cannot be used with ImmLeaf or its subclasses");
+    if (isNonExtLoad())
+      PrintFatalError(
+          getOrigPatFragRecord()->getRecord()->getLoc(),
+          "IsNonExtLoad cannot be used with ImmLeaf or its subclasses");
+    if (isAnyExtLoad())
+      PrintFatalError(
+          getOrigPatFragRecord()->getRecord()->getLoc(),
+          "IsAnyExtLoad cannot be used with ImmLeaf or its subclasses");
+    if (isSignExtLoad())
+      PrintFatalError(
+          getOrigPatFragRecord()->getRecord()->getLoc(),
+          "IsSignExtLoad cannot be used with ImmLeaf or its subclasses");
+    if (isZeroExtLoad())
+      PrintFatalError(
+          getOrigPatFragRecord()->getRecord()->getLoc(),
+          "IsZeroExtLoad cannot be used with ImmLeaf or its subclasses");
+    if (isNonTruncStore())
+      PrintFatalError(
+          getOrigPatFragRecord()->getRecord()->getLoc(),
+          "IsNonTruncStore cannot be used with ImmLeaf or its subclasses");
+    if (isTruncStore())
+      PrintFatalError(
+          getOrigPatFragRecord()->getRecord()->getLoc(),
+          "IsTruncStore cannot be used with ImmLeaf or its subclasses");
+    if (getMemoryVT())
+      PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
+                      "MemoryVT cannot be used with ImmLeaf or its subclasses");
+    if (getScalarMemoryVT())
+      PrintFatalError(
+          getOrigPatFragRecord()->getRecord()->getLoc(),
+          "ScalarMemoryVT cannot be used with ImmLeaf or its subclasses");
+
+    std::string Result = ("    " + getImmType() + " Imm = ").str();
+    if (immCodeUsesAPFloat())
+      Result += "cast<ConstantFPSDNode>(Node)->getValueAPF();\n";
+    else if (immCodeUsesAPInt())
+      Result += "cast<ConstantSDNode>(Node)->getAPIntValue();\n";
+    else
+      Result += "cast<ConstantSDNode>(Node)->getSExtValue();\n";
     return Result + ImmCode;
   }
-  
+
   // Handle arbitrary node predicates.
-  assert(!getPredCode().empty() && "Don't have any predicate code!");
-  std::string ClassName;
+  assert(hasPredCode() && "Don't have any predicate code!");
+  StringRef ClassName;
   if (PatFragRec->getOnlyTree()->isLeaf())
     ClassName = "SDNode";
   else {
@@ -808,8 +1222,8 @@ std::string TreePredicateFn::getCodeToRunOnSDNode() const {
   if (ClassName == "SDNode")
     Result = "    SDNode *N = Node;\n";
   else
-    Result = "    auto *N = cast<" + ClassName + ">(Node);\n";
-  
+    Result = "    auto *N = cast<" + ClassName.str() + ">(Node);\n";
+
   return Result + getPredCode();
 }
 
@@ -817,7 +1231,6 @@ std::string TreePredicateFn::getCodeToRunOnSDNode() const {
 // PatternToMatch implementation
 //
 
-
 /// getPatternSize - Return the 'size' of this pattern.  We want to match large
 /// patterns before small ones.  This is used to determine the size of a
 /// pattern.
@@ -829,10 +1242,8 @@ static unsigned getPatternSize(const TreePatternNode *P,
   if (P->isLeaf() && isa<IntInit>(P->getLeafValue()))
     Size += 2;
 
-  const ComplexPattern *AM = P->getComplexPatternInfo(CGP);
-  if (AM) {
+  if (const ComplexPattern *AM = P->getComplexPatternInfo(CGP)) {
     Size += AM->getComplexity();
-
     // We don't want to count any children twice, so return early.
     return Size;
   }
@@ -844,11 +1255,17 @@ static unsigned getPatternSize(const TreePatternNode *P,
 
   // Count children in the count if they are also nodes.
   for (unsigned i = 0, e = P->getNumChildren(); i != e; ++i) {
-    TreePatternNode *Child = P->getChild(i);
-    if (!Child->isLeaf() && Child->getNumTypes() &&
-        Child->getType(0) != MVT::Other)
-      Size += getPatternSize(Child, CGP);
-    else if (Child->isLeaf()) {
+    const TreePatternNode *Child = P->getChild(i);
+    if (!Child->isLeaf() && Child->getNumTypes()) {
+      const TypeSetByHwMode &T0 = Child->getType(0);
+      // At this point, all variable type sets should be simple, i.e. only
+      // have a default mode.
+      if (T0.getMachineValueType() != MVT::Other) {
+        Size += getPatternSize(Child, CGP);
+        continue;
+      }
+    }
+    if (Child->isLeaf()) {
       if (isa<IntInit>(Child->getLeafValue()))
         Size += 5;  // Matches a ConstantSDNode (+3) and a specific value (+2).
       else if (Child->getComplexPatternInfo(CGP))
@@ -868,52 +1285,37 @@ getPatternComplexity(const CodeGenDAGPatterns &CGP) const {
   return getPatternSize(getSrcPattern(), CGP) + getAddedComplexity();
 }
 
-
 /// getPredicateCheck - Return a single string containing all of this
 /// pattern's predicates concatenated with "&&" operators.
 ///
 std::string PatternToMatch::getPredicateCheck() const {
-  SmallVector<Record *, 4> PredicateRecs;
-  for (Init *I : Predicates->getValues()) {
-    if (DefInit *Pred = dyn_cast<DefInit>(I)) {
-      Record *Def = Pred->getDef();
-      if (!Def->isSubClassOf("Predicate")) {
-#ifndef NDEBUG
-        Def->dump();
-#endif
-        llvm_unreachable("Unknown predicate type!");
-      }
-      PredicateRecs.push_back(Def);
-    }
-  }
-  // Sort so that different orders get canonicalized to the same string.
-  std::sort(PredicateRecs.begin(), PredicateRecs.end(), LessRecord());
-
-  SmallString<128> PredicateCheck;
-  for (Record *Pred : PredicateRecs) {
-    if (!PredicateCheck.empty())
-      PredicateCheck += " && ";
-    PredicateCheck += "(";
-    PredicateCheck += Pred->getValueAsString("CondString");
-    PredicateCheck += ")";
-  }
-
-  return PredicateCheck.str();
+  SmallVector<const Predicate*,4> PredList;
+  for (const Predicate &P : Predicates)
+    PredList.push_back(&P);
+  std::sort(PredList.begin(), PredList.end(), deref<llvm::less>());
+
+  std::string Check;
+  for (unsigned i = 0, e = PredList.size(); i != e; ++i) {
+    if (i != 0)
+      Check += " && ";
+    Check += '(' + PredList[i]->getCondString() + ')';
+  }
+  return Check;
 }
 
 //===----------------------------------------------------------------------===//
 // SDTypeConstraint implementation
 //
 
-SDTypeConstraint::SDTypeConstraint(Record *R) {
+SDTypeConstraint::SDTypeConstraint(Record *R, const CodeGenHwModes &CGH) {
   OperandNo = R->getValueAsInt("OperandNum");
 
   if (R->isSubClassOf("SDTCisVT")) {
     ConstraintType = SDTCisVT;
-    x.SDTCisVT_Info.VT = getValueType(R->getValueAsDef("VT"));
-    if (x.SDTCisVT_Info.VT == MVT::isVoid)
-      PrintFatalError(R->getLoc(), "Cannot use 'Void' as type to SDTCisVT");
-
+    VVT = getValueTypeByHwMode(R->getValueAsDef("VT"), CGH);
+    for (const auto &P : VVT)
+      if (P.second == MVT::isVoid)
+        PrintFatalError(R->getLoc(), "Cannot use 'Void' as type to SDTCisVT");
   } else if (R->isSubClassOf("SDTCisPtrTy")) {
     ConstraintType = SDTCisPtrTy;
   } else if (R->isSubClassOf("SDTCisInt")) {
@@ -942,13 +1344,16 @@ SDTypeConstraint::SDTypeConstraint(Record *R) {
       R->getValueAsInt("OtherOpNum");
   } else if (R->isSubClassOf("SDTCVecEltisVT")) {
     ConstraintType = SDTCVecEltisVT;
-    x.SDTCVecEltisVT_Info.VT = getValueType(R->getValueAsDef("VT"));
-    if (MVT(x.SDTCVecEltisVT_Info.VT).isVector())
-      PrintFatalError(R->getLoc(), "Cannot use vector type as SDTCVecEltisVT");
-    if (!MVT(x.SDTCVecEltisVT_Info.VT).isInteger() &&
-        !MVT(x.SDTCVecEltisVT_Info.VT).isFloatingPoint())
-      PrintFatalError(R->getLoc(), "Must use integer or floating point type "
-                                   "as SDTCVecEltisVT");
+    VVT = getValueTypeByHwMode(R->getValueAsDef("VT"), CGH);
+    for (const auto &P : VVT) {
+      MVT T = P.second;
+      if (T.isVector())
+        PrintFatalError(R->getLoc(),
+                        "Cannot use vector type as SDTCVecEltisVT");
+      if (!T.isInteger() && !T.isFloatingPoint())
+        PrintFatalError(R->getLoc(), "Must use integer or floating point type "
+                                     "as SDTCVecEltisVT");
+    }
   } else if (R->isSubClassOf("SDTCisSameNumEltsAs")) {
     ConstraintType = SDTCisSameNumEltsAs;
     x.SDTCisSameNumEltsAs_Info.OtherOperandNum =
@@ -998,23 +1403,24 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
 
   unsigned ResNo = 0; // The result number being referenced.
   TreePatternNode *NodeToApply = getOperandNum(OperandNo, N, NodeInfo, ResNo);
+  TypeInfer &TI = TP.getInfer();
 
   switch (ConstraintType) {
   case SDTCisVT:
     // Operand must be a particular type.
-    return NodeToApply->UpdateNodeType(ResNo, x.SDTCisVT_Info.VT, TP);
+    return NodeToApply->UpdateNodeType(ResNo, VVT, TP);
   case SDTCisPtrTy:
     // Operand must be same as target pointer type.
     return NodeToApply->UpdateNodeType(ResNo, MVT::iPTR, TP);
   case SDTCisInt:
     // Require it to be one of the legal integer VTs.
-    return NodeToApply->getExtType(ResNo).EnforceInteger(TP);
+     return TI.EnforceInteger(NodeToApply->getExtType(ResNo));
   case SDTCisFP:
     // Require it to be one of the legal fp VTs.
-    return NodeToApply->getExtType(ResNo).EnforceFloatingPoint(TP);
+    return TI.EnforceFloatingPoint(NodeToApply->getExtType(ResNo));
   case SDTCisVec:
     // Require it to be one of the legal vector VTs.
-    return NodeToApply->getExtType(ResNo).EnforceVector(TP);
+    return TI.EnforceVector(NodeToApply->getExtType(ResNo));
   case SDTCisSameAs: {
     unsigned OResNo = 0;
     TreePatternNode *OtherNode =
@@ -1032,36 +1438,35 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
       TP.error(N->getOperator()->getName() + " expects a VT operand!");
       return false;
     }
-    MVT::SimpleValueType VT =
-     getValueType(static_cast<DefInit*>(NodeToApply->getLeafValue())->getDef());
-
-    EEVT::TypeSet TypeListTmp(VT, TP);
+    DefInit *DI = static_cast<DefInit*>(NodeToApply->getLeafValue());
+    const CodeGenTarget &T = TP.getDAGPatterns().getTargetInfo();
+    auto VVT = getValueTypeByHwMode(DI->getDef(), T.getHwModes());
+    TypeSetByHwMode TypeListTmp(VVT);
 
     unsigned OResNo = 0;
     TreePatternNode *OtherNode =
       getOperandNum(x.SDTCisVTSmallerThanOp_Info.OtherOperandNum, N, NodeInfo,
                     OResNo);
 
-    return TypeListTmp.EnforceSmallerThan(OtherNode->getExtType(OResNo), TP);
+    return TI.EnforceSmallerThan(TypeListTmp, OtherNode->getExtType(OResNo));
   }
   case SDTCisOpSmallerThanOp: {
     unsigned BResNo = 0;
     TreePatternNode *BigOperand =
       getOperandNum(x.SDTCisOpSmallerThanOp_Info.BigOperandNum, N, NodeInfo,
                     BResNo);
-    return NodeToApply->getExtType(ResNo).
-                  EnforceSmallerThan(BigOperand->getExtType(BResNo), TP);
+    return TI.EnforceSmallerThan(NodeToApply->getExtType(ResNo),
+                                 BigOperand->getExtType(BResNo));
   }
   case SDTCisEltOfVec: {
     unsigned VResNo = 0;
     TreePatternNode *VecOperand =
       getOperandNum(x.SDTCisEltOfVec_Info.OtherOperandNum, N, NodeInfo,
                     VResNo);
-
     // Filter vector types out of VecOperand that don't have the right element
     // type.
-    return VecOperand->getExtType(VResNo).
-      EnforceVectorEltTypeIs(NodeToApply->getExtType(ResNo), TP);
+    return TI.EnforceVectorEltTypeIs(VecOperand->getExtType(VResNo),
+                                     NodeToApply->getExtType(ResNo));
   }
   case SDTCisSubVecOfVec: {
     unsigned VResNo = 0;
@@ -1071,28 +1476,27 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
 
     // Filter vector types out of BigVecOperand that don't have the
     // right subvector type.
-    return BigVecOperand->getExtType(VResNo).
-      EnforceVectorSubVectorTypeIs(NodeToApply->getExtType(ResNo), TP);
+    return TI.EnforceVectorSubVectorTypeIs(BigVecOperand->getExtType(VResNo),
+                                           NodeToApply->getExtType(ResNo));
   }
   case SDTCVecEltisVT: {
-    return NodeToApply->getExtType(ResNo).
-      EnforceVectorEltTypeIs(x.SDTCVecEltisVT_Info.VT, TP);
+    return TI.EnforceVectorEltTypeIs(NodeToApply->getExtType(ResNo), VVT);
   }
   case SDTCisSameNumEltsAs: {
     unsigned OResNo = 0;
     TreePatternNode *OtherNode =
       getOperandNum(x.SDTCisSameNumEltsAs_Info.OtherOperandNum,
                     N, NodeInfo, OResNo);
-    return OtherNode->getExtType(OResNo).
-      EnforceSameNumElts(NodeToApply->getExtType(ResNo), TP);
+    return TI.EnforceSameNumElts(OtherNode->getExtType(OResNo),
+                                 NodeToApply->getExtType(ResNo));
   }
   case SDTCisSameSizeAs: {
     unsigned OResNo = 0;
     TreePatternNode *OtherNode =
       getOperandNum(x.SDTCisSameSizeAs_Info.OtherOperandNum,
                     N, NodeInfo, OResNo);
-    return OtherNode->getExtType(OResNo).
-      EnforceSameSize(NodeToApply->getExtType(ResNo), TP);
+    return TI.EnforceSameSize(OtherNode->getExtType(OResNo),
+                              NodeToApply->getExtType(ResNo));
   }
   }
   llvm_unreachable("Invalid ConstraintType!");
@@ -1110,9 +1514,11 @@ bool TreePatternNode::UpdateNodeTypeFromInst(unsigned ResNo,
     return false;
 
   // The Operand class specifies a type directly.
-  if (Operand->isSubClassOf("Operand"))
-    return UpdateNodeType(ResNo, getValueType(Operand->getValueAsDef("Type")),
-                          TP);
+  if (Operand->isSubClassOf("Operand")) {
+    Record *R = Operand->getValueAsDef("Type");
+    const CodeGenTarget &T = TP.getDAGPatterns().getTargetInfo();
+    return UpdateNodeType(ResNo, getValueTypeByHwMode(R, T.getHwModes()), TP);
+  }
 
   // PointerLikeRegClass has a type that is determined at runtime.
   if (Operand->isSubClassOf("PointerLikeRegClass"))
@@ -1131,11 +1537,53 @@ bool TreePatternNode::UpdateNodeTypeFromInst(unsigned ResNo,
   return UpdateNodeType(ResNo, Tgt.getRegisterClass(RC).getValueTypes(), TP);
 }
 
+bool TreePatternNode::ContainsUnresolvedType(TreePattern &TP) const {
+  for (unsigned i = 0, e = Types.size(); i != e; ++i)
+    if (!TP.getInfer().isConcrete(Types[i], true))
+      return true;
+  for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
+    if (getChild(i)->ContainsUnresolvedType(TP))
+      return true;
+  return false;
+}
+
+bool TreePatternNode::hasProperTypeByHwMode() const {
+  for (const TypeSetByHwMode &S : Types)
+    if (!S.isDefaultOnly())
+      return true;
+  for (TreePatternNode *C : Children)
+    if (C->hasProperTypeByHwMode())
+      return true;
+  return false;
+}
+
+bool TreePatternNode::hasPossibleType() const {
+  for (const TypeSetByHwMode &S : Types)
+    if (!S.isPossible())
+      return false;
+  for (TreePatternNode *C : Children)
+    if (!C->hasPossibleType())
+      return false;
+  return true;
+}
+
+bool TreePatternNode::setDefaultMode(unsigned Mode) {
+  for (TypeSetByHwMode &S : Types) {
+    S.makeSimple(Mode);
+    // Check if the selected mode had a type conflict.
+    if (S.get(DefaultMode).empty())
+      return false;
+  }
+  for (TreePatternNode *C : Children)
+    if (!C->setDefaultMode(Mode))
+      return false;
+  return true;
+}
 
 //===----------------------------------------------------------------------===//
 // SDNodeInfo implementation
 //
-SDNodeInfo::SDNodeInfo(Record *R) : Def(R) {
+SDNodeInfo::SDNodeInfo(Record *R, const CodeGenHwModes &CGH) : Def(R) {
   EnumName    = R->getValueAsString("Opcode");
   SDClassName = R->getValueAsString("SDClass");
   Record *TypeProfile = R->getValueAsDef("TypeProfile");
@@ -1178,7 +1626,8 @@ SDNodeInfo::SDNodeInfo(Record *R) : Def(R) {
   // Parse the type constraints.
   std::vector<Record*> ConstraintList =
     TypeProfile->getValueAsListOfDefs("Constraints");
-  TypeConstraints.assign(ConstraintList.begin(), ConstraintList.end());
+  for (Record *R : ConstraintList)
+    TypeConstraints.emplace_back(R, CGH);
 }
 
 /// getKnownType - If the type constraints on this node imply a fixed type
@@ -1198,7 +1647,9 @@ MVT::SimpleValueType SDNodeInfo::getKnownType(unsigned ResNo) const {
     switch (Constraint.ConstraintType) {
     default: break;
     case SDTypeConstraint::SDTCisVT:
-      return Constraint.x.SDTCisVT_Info.VT;
+      if (Constraint.VVT.isSimple())
+        return Constraint.VVT.getSimple().SimpleTy;
+      break;
     case SDTypeConstraint::SDTCisPtrTy:
       return MVT::iPTR;
     }
@@ -1284,8 +1735,10 @@ void TreePatternNode::print(raw_ostream &OS) const {
   else
     OS << '(' << getOperator()->getName();
 
-  for (unsigned i = 0, e = Types.size(); i != e; ++i)
-    OS << ':' << getExtType(i).getName();
+  for (unsigned i = 0, e = Types.size(); i != e; ++i) {
+    OS << ':';
+    getExtType(i).writeToStream(OS);
+  }
 
   if (!isLeaf()) {
     if (getNumChildren() != 0) {
@@ -1368,7 +1821,7 @@ TreePatternNode *TreePatternNode::clone() const {
 /// RemoveAllTypes - Recursively strip all the types of this tree.
 void TreePatternNode::RemoveAllTypes() {
   // Reset to unknown type.
-  std::fill(Types.begin(), Types.end(), EEVT::TypeSet());
+  std::fill(Types.begin(), Types.end(), TypeSetByHwMode());
   if (isLeaf()) return;
   for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
     getChild(i)->RemoveAllTypes();
@@ -1485,18 +1938,20 @@ TreePatternNode *TreePatternNode::InlinePatternFragments(TreePattern &TP) {
 /// When Unnamed is false, return the type of a named DAG operand such as the
 /// GPR:$src operand above.
 ///
-static EEVT::TypeSet getImplicitType(Record *R, unsigned ResNo,
-                                     bool NotRegisters,
-                                     bool Unnamed,
-                                     TreePattern &TP) {
+static TypeSetByHwMode getImplicitType(Record *R, unsigned ResNo,
+                                       bool NotRegisters,
+                                       bool Unnamed,
+                                       TreePattern &TP) {
+  CodeGenDAGPatterns &CDP = TP.getDAGPatterns();
+
   // Check to see if this is a register operand.
   if (R->isSubClassOf("RegisterOperand")) {
     assert(ResNo == 0 && "Regoperand ref only has one result!");
     if (NotRegisters)
-      return EEVT::TypeSet(); // Unknown.
+      return TypeSetByHwMode(); // Unknown.
     Record *RegClass = R->getValueAsDef("RegClass");
     const CodeGenTarget &T = TP.getDAGPatterns().getTargetInfo();
-    return EEVT::TypeSet(T.getRegisterClass(RegClass).getValueTypes());
+    return TypeSetByHwMode(T.getRegisterClass(RegClass).getValueTypes());
   }
 
   // Check to see if this is a register or a register class.
@@ -1505,33 +1960,33 @@ static EEVT::TypeSet getImplicitType(Record *R, unsigned ResNo,
     // An unnamed register class represents itself as an i32 immediate, for
     // example on a COPY_TO_REGCLASS instruction.
     if (Unnamed)
-      return EEVT::TypeSet(MVT::i32, TP);
+      return TypeSetByHwMode(MVT::i32);
 
     // In a named operand, the register class provides the possible set of
     // types.
     if (NotRegisters)
-      return EEVT::TypeSet(); // Unknown.
+      return TypeSetByHwMode(); // Unknown.
     const CodeGenTarget &T = TP.getDAGPatterns().getTargetInfo();
-    return EEVT::TypeSet(T.getRegisterClass(R).getValueTypes());
+    return TypeSetByHwMode(T.getRegisterClass(R).getValueTypes());
   }
 
   if (R->isSubClassOf("PatFrag")) {
     assert(ResNo == 0 && "FIXME: PatFrag with multiple results?");
     // Pattern fragment types will be resolved when they are inlined.
-    return EEVT::TypeSet(); // Unknown.
+    return TypeSetByHwMode(); // Unknown.
   }
 
   if (R->isSubClassOf("Register")) {
     assert(ResNo == 0 && "Registers only produce one result!");
     if (NotRegisters)
-      return EEVT::TypeSet(); // Unknown.
+      return TypeSetByHwMode(); // Unknown.
     const CodeGenTarget &T = TP.getDAGPatterns().getTargetInfo();
-    return EEVT::TypeSet(T.getRegisterVTs(R));
+    return TypeSetByHwMode(T.getRegisterVTs(R));
   }
 
   if (R->isSubClassOf("SubRegIndex")) {
     assert(ResNo == 0 && "SubRegisterIndices only produce one result!");
-    return EEVT::TypeSet(MVT::i32, TP);
+    return TypeSetByHwMode(MVT::i32);
   }
 
   if (R->isSubClassOf("ValueType")) {
@@ -1541,46 +1996,51 @@ static EEVT::TypeSet getImplicitType(Record *R, unsigned ResNo,
     //   (sext_inreg GPR:$src, i16)
     //                         ~~~
     if (Unnamed)
-      return EEVT::TypeSet(MVT::Other, TP);
+      return TypeSetByHwMode(MVT::Other);
     // With a name, the ValueType simply provides the type of the named
     // variable.
     //
     //   (sext_inreg i32:$src, i16)
     //               ~~~~~~~~
     if (NotRegisters)
-      return EEVT::TypeSet(); // Unknown.
-    return EEVT::TypeSet(getValueType(R), TP);
+      return TypeSetByHwMode(); // Unknown.
+    const CodeGenHwModes &CGH = CDP.getTargetInfo().getHwModes();
+    return TypeSetByHwMode(getValueTypeByHwMode(R, CGH));
   }
 
   if (R->isSubClassOf("CondCode")) {
     assert(ResNo == 0 && "This node only has one result!");
     // Using a CondCodeSDNode.
-    return EEVT::TypeSet(MVT::Other, TP);
+    return TypeSetByHwMode(MVT::Other);
   }
 
   if (R->isSubClassOf("ComplexPattern")) {
     assert(ResNo == 0 && "FIXME: ComplexPattern with multiple results?");
     if (NotRegisters)
-      return EEVT::TypeSet(); // Unknown.
-   return EEVT::TypeSet(TP.getDAGPatterns().getComplexPattern(R).getValueType(),
-                         TP);
+      return TypeSetByHwMode(); // Unknown.
+    return TypeSetByHwMode(CDP.getComplexPattern(R).getValueType());
   }
   if (R->isSubClassOf("PointerLikeRegClass")) {
     assert(ResNo == 0 && "Regclass can only have one result!");
-    return EEVT::TypeSet(MVT::iPTR, TP);
+    TypeSetByHwMode VTS(MVT::iPTR);
+    TP.getInfer().expandOverloads(VTS);
+    return VTS;
   }
 
   if (R->getName() == "node" || R->getName() == "srcvalue" ||
       R->getName() == "zero_reg") {
     // Placeholder.
-    return EEVT::TypeSet(); // Unknown.
+    return TypeSetByHwMode(); // Unknown.
   }
 
-  if (R->isSubClassOf("Operand"))
-    return EEVT::TypeSet(getValueType(R->getValueAsDef("Type")));
+  if (R->isSubClassOf("Operand")) {
+    const CodeGenHwModes &CGH = CDP.getTargetInfo().getHwModes();
+    Record *T = R->getValueAsDef("Type");
+    return TypeSetByHwMode(getValueTypeByHwMode(T, CGH));
+  }
 
   TP.error("Unknown node flavor used in pattern: " + R->getName());
-  return EEVT::TypeSet(MVT::Other, TP);
+  return TypeSetByHwMode(MVT::Other);
 }
 
 
@@ -1722,29 +2182,34 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
       assert(Types.size() == 1 && "Invalid IntInit");
 
       // Int inits are always integers. :)
-      bool MadeChange = Types[0].EnforceInteger(TP);
-
-      if (!Types[0].isConcrete())
-        return MadeChange;
+      bool MadeChange = TP.getInfer().EnforceInteger(Types[0]);
 
-      MVT::SimpleValueType VT = getType(0);
-      if (VT == MVT::iPTR || VT == MVT::iPTRAny)
+      if (!TP.getInfer().isConcrete(Types[0], false))
         return MadeChange;
 
-      unsigned Size = MVT(VT).getSizeInBits();
-      // Make sure that the value is representable for this type.
-      if (Size >= 32) return MadeChange;
-
-      // Check that the value doesn't use more bits than we have. It must either
-      // be a sign- or zero-extended equivalent of the original.
-      int64_t SignBitAndAbove = II->getValue() >> (Size - 1);
-      if (SignBitAndAbove == -1 || SignBitAndAbove == 0 || SignBitAndAbove == 1)
-        return MadeChange;
+      ValueTypeByHwMode VVT = TP.getInfer().getConcrete(Types[0], false);
+      for (auto &P : VVT) {
+        MVT::SimpleValueType VT = P.second.SimpleTy;
+        if (VT == MVT::iPTR || VT == MVT::iPTRAny)
+          continue;
+        unsigned Size = MVT(VT).getSizeInBits();
+        // Make sure that the value is representable for this type.
+        if (Size >= 32)
+          continue;
+        // Check that the value doesn't use more bits than we have. It must
+        // either be a sign- or zero-extended equivalent of the original.
+        int64_t SignBitAndAbove = II->getValue() >> (Size - 1);
+        if (SignBitAndAbove == -1 || SignBitAndAbove == 0 ||
+            SignBitAndAbove == 1)
+          continue;
 
-      TP.error("Integer value '" + itostr(II->getValue()) +
-               "' is out of range for type '" + getEnumName(getType(0)) + "'!");
-      return false;
+        TP.error("Integer value '" + itostr(II->getValue()) +
+                 "' is out of range for type '" + getEnumName(VT) + "'!");
+        break;
+      }
+      return MadeChange;
     }
+
     return false;
   }
 
@@ -1773,7 +2238,7 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
 
     bool MadeChange = false;
     for (unsigned i = 0; i < getNumChildren(); ++i)
-      MadeChange = getChild(i)->ApplyTypeConstraints(TP, NotRegisters);
+      MadeChange |= getChild(i)->ApplyTypeConstraints(TP, NotRegisters);
     return MadeChange;
   }
 
@@ -1818,9 +2283,10 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
       return false;
     }
 
-    bool MadeChange = NI.ApplyTypeConstraints(this, TP);
+    bool MadeChange = false;
     for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
       MadeChange |= getChild(i)->ApplyTypeConstraints(TP, NotRegisters);
+    MadeChange |= NI.ApplyTypeConstraints(this, TP);
     return MadeChange;
   }
 
@@ -1975,18 +2441,6 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
   }
 
   bool MadeChange = getChild(0)->ApplyTypeConstraints(TP, NotRegisters);
-
-
-  // If either the output or input of the xform does not have exact
-  // type info. We assume they must be the same. Otherwise, it is perfectly
-  // legal to transform from one type to a completely different type.
-#if 0
-  if (!hasTypeSet() || !getChild(0)->hasTypeSet()) {
-    bool MadeChange = UpdateNodeType(getChild(0)->getExtType(), TP);
-    MadeChange |= getChild(0)->UpdateNodeType(getExtType(), TP);
-    return MadeChange;
-  }
-#endif
   return MadeChange;
 }
 
@@ -2050,20 +2504,23 @@ bool TreePatternNode::canPatternMatch(std::string &Reason,
 
 TreePattern::TreePattern(Record *TheRec, ListInit *RawPat, bool isInput,
                          CodeGenDAGPatterns &cdp) : TheRecord(TheRec), CDP(cdp),
-                         isInputPattern(isInput), HasError(false) {
+                         isInputPattern(isInput), HasError(false),
+                         Infer(*this) {
   for (Init *I : RawPat->getValues())
     Trees.push_back(ParseTreePattern(I, ""));
 }
 
 TreePattern::TreePattern(Record *TheRec, DagInit *Pat, bool isInput,
                          CodeGenDAGPatterns &cdp) : TheRecord(TheRec), CDP(cdp),
-                         isInputPattern(isInput), HasError(false) {
+                         isInputPattern(isInput), HasError(false),
+                         Infer(*this) {
   Trees.push_back(ParseTreePattern(Pat, ""));
 }
 
 TreePattern::TreePattern(Record *TheRec, TreePatternNode *Pat, bool isInput,
                          CodeGenDAGPatterns &cdp) : TheRecord(TheRec), CDP(cdp),
-                         isInputPattern(isInput), HasError(false) {
+                         isInputPattern(isInput), HasError(false),
+                         Infer(*this) {
   Trees.push_back(Pat);
 }
 
@@ -2158,7 +2615,8 @@ TreePatternNode *TreePattern::ParseTreePattern(Init *TheInit, StringRef OpName){
 
     // Apply the type cast.
     assert(New->getNumTypes() == 1 && "FIXME: Unhandled");
-    New->UpdateNodeType(0, getValueType(Operator), *this);
+    const CodeGenHwModes &CGH = getDAGPatterns().getTargetInfo().getHwModes();
+    New->UpdateNodeType(0, getValueTypeByHwMode(Operator, CGH), *this);
 
     if (!OpName.empty())
       error("ValueType cast should not have a name!");
@@ -2273,7 +2731,7 @@ static bool SimplifyTree(TreePatternNode *&N) {
   // If we have a bitconvert with a resolved type and if the source and
   // destination types are the same, then the bitconvert is useless, remove it.
   if (N->getOperator()->getName() == "bitconvert" &&
-      N->getExtType(0).isConcrete() &&
+      N->getExtType(0).isValueTypeByHwMode(false) &&
       N->getExtType(0) == N->getChild(0)->getExtType(0) &&
       N->getName().empty()) {
     N = N->getChild(0);
@@ -2304,7 +2762,7 @@ InferAllTypes(const StringMap<SmallVector<TreePatternNode*,1> > *InNamedTypes) {
   bool MadeChange = true;
   while (MadeChange) {
     MadeChange = false;
-    for (TreePatternNode *Tree : Trees) {
+    for (TreePatternNode *&Tree : Trees) {
       MadeChange |= Tree->ApplyTypeConstraints(*this, false);
       MadeChange |= SimplifyTree(Tree);
     }
@@ -2364,7 +2822,7 @@ InferAllTypes(const StringMap<SmallVector<TreePatternNode*,1> > *InNamedTypes) {
 
   bool HasUnresolvedTypes = false;
   for (const TreePatternNode *Tree : Trees)
-    HasUnresolvedTypes |= Tree->ContainsUnresolvedType();
+    HasUnresolvedTypes |= Tree->ContainsUnresolvedType(*this);
   return !HasUnresolvedTypes;
 }
 
@@ -2396,8 +2854,10 @@ void TreePattern::dump() const { print(errs()); }
 // CodeGenDAGPatterns implementation
 //
 
-CodeGenDAGPatterns::CodeGenDAGPatterns(RecordKeeper &R) :
-  Records(R), Target(R) {
+CodeGenDAGPatterns::CodeGenDAGPatterns(RecordKeeper &R,
+                                       PatternRewriterFn PatternRewriter)
+    : Records(R), Target(R), LegalVTS(Target.getLegalValueTypes()),
+      PatternRewriter(PatternRewriter) {
 
   Intrinsics = CodeGenIntrinsicTable(Records, false);
   TgtIntrinsics = CodeGenIntrinsicTable(Records, true);
@@ -2410,6 +2870,11 @@ CodeGenDAGPatterns::CodeGenDAGPatterns(RecordKeeper &R) :
   ParsePatternFragments(/*OutFrags*/true);
   ParsePatterns();
 
+  // Break patterns with parameterized types into a series of patterns,
+  // where each one has a fixed type and is predicated on the conditions
+  // of the associated HW mode.
+  ExpandHwModeBasedTypes();
+
   // Generate variants.  For example, commutative patterns can match
   // multiple ways.  Add them to PatternsToMatch as well.
   GenerateVariants();
@@ -2434,8 +2899,11 @@ Record *CodeGenDAGPatterns::getSDNodeNamed(const std::string &Name) const {
 // Parse all of the SDNode definitions for the target, populating SDNodes.
 void CodeGenDAGPatterns::ParseNodeInfo() {
   std::vector<Record*> Nodes = Records.getAllDerivedDefinitions("SDNode");
+  const CodeGenHwModes &CGH = getTargetInfo().getHwModes();
+
   while (!Nodes.empty()) {
-    SDNodes.insert(std::make_pair(Nodes.back(), Nodes.back()));
+    Record *R = Nodes.back();
+    SDNodes.insert(std::make_pair(R, SDNodeInfo(R, CGH)));
     Nodes.pop_back();
   }
 
@@ -2489,7 +2957,10 @@ void CodeGenDAGPatterns::ParsePatternFragments(bool OutFrags) {
 
     // Validate the argument list, converting it to set, to discard duplicates.
     std::vector<std::string> &Args = P->getArgList();
-    std::set<std::string> OperandsSet(Args.begin(), Args.end());
+    // Copy the args so we can take StringRefs to them.
+    auto ArgsCopy = Args;
+    SmallDenseSet<StringRef, 4> OperandsSet;
+    OperandsSet.insert(ArgsCopy.begin(), ArgsCopy.end());
 
     if (OperandsSet.count(""))
       P->error("Cannot have unnamed 'node' values in pattern fragment!");
@@ -2589,7 +3060,7 @@ void CodeGenDAGPatterns::ParseDefaultOperands() {
       while (TPN->ApplyTypeConstraints(P, false))
         /* Resolve all types */;
 
-      if (TPN->ContainsUnresolvedType()) {
+      if (TPN->ContainsUnresolvedType(P)) {
         PrintFatalError("Value #" + Twine(i) + " of OperandWithDefaultOps '" +
                         DefaultOps[i]->getName() +
                         "' doesn't have a concrete type!");
@@ -2981,17 +3452,20 @@ const DAGInstruction &CodeGenDAGPatterns::parseInstructionPattern(
 
   // Verify that the top-level forms in the instruction are of void type, and
   // fill in the InstResults map.
+  SmallString<32> TypesString;
   for (unsigned j = 0, e = I->getNumTrees(); j != e; ++j) {
+    TypesString.clear();
     TreePatternNode *Pat = I->getTree(j);
     if (Pat->getNumTypes() != 0) {
-      std::string Types;
+      raw_svector_ostream OS(TypesString);
       for (unsigned k = 0, ke = Pat->getNumTypes(); k != ke; ++k) {
         if (k > 0)
-          Types += ", ";
-        Types += Pat->getExtType(k).getName();
+          OS << ", ";
+        Pat->getExtType(k).writeToStream(OS);
       }
       I->error("Top-level forms in instruction pattern should have"
-               " void types, has types " + Types);
+               " void types, has types " +
+               OS.str());
     }
 
     // Find inputs and outputs, and verify the structure of the uses/defs.
@@ -3174,6 +3648,8 @@ void CodeGenDAGPatterns::ParseInstructions() {
     TreePattern *I = TheInst.getPattern();
     if (!I) continue;  // No pattern.
 
+    if (PatternRewriter)
+      PatternRewriter(I);
     // FIXME: Assume only the first tree is the pattern. The others are clobber
     // nodes.
     TreePatternNode *Pattern = I->getTree(0);
@@ -3186,14 +3662,13 @@ void CodeGenDAGPatterns::ParseInstructions() {
     }
 
     Record *Instr = Entry.first;
-    AddPatternToMatch(I,
-                      PatternToMatch(Instr,
-                                     Instr->getValueAsListInit("Predicates"),
-                                     SrcPattern,
-                                     TheInst.getResultPattern(),
-                                     TheInst.getImpResults(),
-                                     Instr->getValueAsInt("AddedComplexity"),
-                                     Instr->getID()));
+    ListInit *Preds = Instr->getValueAsListInit("Predicates");
+    int Complexity = Instr->getValueAsInt("AddedComplexity");
+    AddPatternToMatch(
+        I,
+        PatternToMatch(Instr, makePredList(Preds), SrcPattern,
+                       TheInst.getResultPattern(), TheInst.getImpResults(),
+                       Complexity, Instr->getID()));
   }
 }
 
@@ -3219,6 +3694,20 @@ static void FindNames(const TreePatternNode *P,
   }
 }
 
+std::vector<Predicate> CodeGenDAGPatterns::makePredList(ListInit *L) {
+  std::vector<Predicate> Preds;
+  for (Init *I : L->getValues()) {
+    if (DefInit *Pred = dyn_cast<DefInit>(I))
+      Preds.push_back(Pred->getDef());
+    else
+      llvm_unreachable("Non-def on the list");
+  }
+
+  // Sort so that different orders get canonicalized to the same string.
+  std::sort(Preds.begin(), Preds.end());
+  return Preds;
+}
+
 void CodeGenDAGPatterns::AddPatternToMatch(TreePattern *Pattern,
                                            PatternToMatch &&PTM) {
   // Do some sanity checking on the pattern we're about to match.
@@ -3262,8 +3751,6 @@ void CodeGenDAGPatterns::AddPatternToMatch(TreePattern *Pattern,
   PatternsToMatch.push_back(std::move(PTM));
 }
 
-
-
 void CodeGenDAGPatterns::InferInstructionFlags() {
   ArrayRef<const CodeGenInstruction*> Instructions =
     Target.getInstructionsByEnumValue();
@@ -3425,12 +3912,13 @@ static bool ForceArbitraryInstResultType(TreePatternNode *N, TreePattern &TP) {
 
   // If this type is already concrete or completely unknown we can't do
   // anything.
+  TypeInfer &TI = TP.getInfer();
   for (unsigned i = 0, e = N->getNumTypes(); i != e; ++i) {
-    if (N->getExtType(i).isCompletelyUnknown() || N->getExtType(i).isConcrete())
+    if (N->getExtType(i).empty() || TI.isConcrete(N->getExtType(i), false))
       continue;
 
-    // Otherwise, force its type to the first possibility (an arbitrary choice).
-    if (N->getExtType(i).MergeInTypeInfo(N->getExtType(i).getTypeList()[0], TP))
+    // Otherwise, force its type to an arbitrary choice.
+    if (TI.forceArbitrary(N->getExtType(i)))
       return true;
   }
 
@@ -3551,15 +4039,156 @@ void CodeGenDAGPatterns::ParsePatterns() {
     TreePattern Temp(Result.getRecord(), DstPattern, false, *this);
     Temp.InferAllTypes();
 
-    AddPatternToMatch(
-        Pattern,
-        PatternToMatch(
-            CurPattern, CurPattern->getValueAsListInit("Predicates"),
-            Pattern->getTree(0), Temp.getOnlyTree(), std::move(InstImpResults),
-            CurPattern->getValueAsInt("AddedComplexity"), CurPattern->getID()));
+    // A pattern may end up with an "impossible" type, i.e. a situation
+    // where all types have been eliminated for some node in this pattern.
+    // This could occur for intrinsics that only make sense for a specific
+    // value type, and use a specific register class. If, for some mode,
+    // that register class does not accept that type, the type inference
+    // will lead to a contradiction, which is not an error however, but
+    // a sign that this pattern will simply never match.
+    if (Pattern->getTree(0)->hasPossibleType() &&
+        Temp.getOnlyTree()->hasPossibleType()) {
+      ListInit *Preds = CurPattern->getValueAsListInit("Predicates");
+      int Complexity = CurPattern->getValueAsInt("AddedComplexity");
+      if (PatternRewriter)
+        PatternRewriter(Pattern);
+      AddPatternToMatch(
+          Pattern,
+          PatternToMatch(
+              CurPattern, makePredList(Preds), Pattern->getTree(0),
+              Temp.getOnlyTree(), std::move(InstImpResults), Complexity,
+              CurPattern->getID()));
+    }
   }
 }
 
+static void collectModes(std::set<unsigned> &Modes, const TreePatternNode *N) {
+  for (const TypeSetByHwMode &VTS : N->getExtTypes())
+    for (const auto &I : VTS)
+      Modes.insert(I.first);
+
+  for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i)
+    collectModes(Modes, N->getChild(i));
+}
+
+void CodeGenDAGPatterns::ExpandHwModeBasedTypes() {
+  const CodeGenHwModes &CGH = getTargetInfo().getHwModes();
+  std::map<unsigned,std::vector<Predicate>> ModeChecks;
+  std::vector<PatternToMatch> Copy = PatternsToMatch;
+  PatternsToMatch.clear();
+
+  auto AppendPattern = [this,&ModeChecks](PatternToMatch &P, unsigned Mode) {
+    TreePatternNode *NewSrc = P.SrcPattern->clone();
+    TreePatternNode *NewDst = P.DstPattern->clone();
+    if (!NewSrc->setDefaultMode(Mode) || !NewDst->setDefaultMode(Mode)) {
+      delete NewSrc;
+      delete NewDst;
+      return;
+    }
+
+    std::vector<Predicate> Preds = P.Predicates;
+    const std::vector<Predicate> &MC = ModeChecks[Mode];
+    Preds.insert(Preds.end(), MC.begin(), MC.end());
+    PatternsToMatch.emplace_back(P.getSrcRecord(), Preds, NewSrc, NewDst,
+                                 P.getDstRegs(), P.getAddedComplexity(),
+                                 Record::getNewUID(), Mode);
+  };
+
+  for (PatternToMatch &P : Copy) {
+    TreePatternNode *SrcP = nullptr, *DstP = nullptr;
+    if (P.SrcPattern->hasProperTypeByHwMode())
+      SrcP = P.SrcPattern;
+    if (P.DstPattern->hasProperTypeByHwMode())
+      DstP = P.DstPattern;
+    if (!SrcP && !DstP) {
+      PatternsToMatch.push_back(P);
+      continue;
+    }
+
+    std::set<unsigned> Modes;
+    if (SrcP)
+      collectModes(Modes, SrcP);
+    if (DstP)
+      collectModes(Modes, DstP);
+
+    // The predicate for the default mode needs to be constructed for each
+    // pattern separately.
+    // Since not all modes must be present in each pattern, if a mode m is
+    // absent, then there is no point in constructing a check for m. If such
+    // a check was created, it would be equivalent to checking the default
+    // mode, except not all modes' predicates would be a part of the checking
+    // code. The subsequently generated check for the default mode would then
+    // have the exact same patterns, but a different predicate code. To avoid
+    // duplicated patterns with different predicate checks, construct the
+    // default check as a negation of all predicates that are actually present
+    // in the source/destination patterns.
+    std::vector<Predicate> DefaultPred;
+
+    for (unsigned M : Modes) {
+      if (M == DefaultMode)
+        continue;
+      if (ModeChecks.find(M) != ModeChecks.end())
+        continue;
+
+      // Fill the map entry for this mode.
+      const HwMode &HM = CGH.getMode(M);
+      ModeChecks[M].emplace_back(Predicate(HM.Features, true));
+
+      // Add negations of the HM's predicates to the default predicate.
+      DefaultPred.emplace_back(Predicate(HM.Features, false));
+    }
+
+    for (unsigned M : Modes) {
+      if (M == DefaultMode)
+        continue;
+      AppendPattern(P, M);
+    }
+
+    bool HasDefault = Modes.count(DefaultMode);
+    if (HasDefault)
+      AppendPattern(P, DefaultMode);
+  }
+}
+
+/// Dependent variable map for CodeGenDAGPattern variant generation
+typedef StringMap<int> DepVarMap;
+
+static void FindDepVarsOf(TreePatternNode *N, DepVarMap &DepMap) {
+  if (N->isLeaf()) {
+    if (N->hasName() && isa<DefInit>(N->getLeafValue()))
+      DepMap[N->getName()]++;
+  } else {
+    for (size_t i = 0, e = N->getNumChildren(); i != e; ++i)
+      FindDepVarsOf(N->getChild(i), DepMap);
+  }
+}
+
+/// Find dependent variables within child patterns
+static void FindDepVars(TreePatternNode *N, MultipleUseVarSet &DepVars) {
+  DepVarMap depcounts;
+  FindDepVarsOf(N, depcounts);
+  for (const auto &Pair : depcounts) {
+    if (Pair.getValue() > 1)
+      DepVars.insert(Pair.getKey());
+  }
+}
+
+#ifndef NDEBUG
+/// Dump the dependent variable set:
+static void DumpDepVars(MultipleUseVarSet &DepVars) {
+  if (DepVars.empty()) {
+    DEBUG(errs() << "<empty set>");
+  } else {
+    DEBUG(errs() << "[ ");
+    for (const auto &DepVar : DepVars) {
+      DEBUG(errs() << DepVar.getKey() << " ");
+    }
+    DEBUG(errs() << "]");
+  }
+}
+#endif
+
+
 /// CombineChildVariants - Given a bunch of permutations of each child of the
 /// 'operator' node, put them together in all possible ways.
 static void CombineChildVariants(TreePatternNode *Orig,
@@ -3744,7 +4373,7 @@ static void GenerateVariantsOf(TreePatternNode *N,
   // If this node is commutative, consider the commuted order.
   bool isCommIntrinsic = N->isCommutativeIntrinsic(CDP);
   if (NodeInfo.hasProperty(SDNPCommutative) || isCommIntrinsic) {
-    assert((N->getNumChildren()==2 || isCommIntrinsic) &&
+    assert((N->getNumChildren()>=2 || isCommIntrinsic) &&
            "Commutative but doesn't have 2 children!");
     // Don't count children which are actually register references.
     unsigned NC = 0;
@@ -3772,9 +4401,14 @@ static void GenerateVariantsOf(TreePatternNode *N,
       for (unsigned i = 3; i != NC; ++i)
         Variants.push_back(ChildVariants[i]);
       CombineChildVariants(N, Variants, OutVariants, CDP, DepVars);
-    } else if (NC == 2)
-      CombineChildVariants(N, ChildVariants[1], ChildVariants[0],
-                           OutVariants, CDP, DepVars);
+    } else if (NC == N->getNumChildren()) {
+      std::vector<std::vector<TreePatternNode*> > Variants;
+      Variants.push_back(ChildVariants[1]);
+      Variants.push_back(ChildVariants[0]);
+      for (unsigned i = 2; i != NC; ++i)
+        Variants.push_back(ChildVariants[i]);
+      CombineChildVariants(N, Variants, OutVariants, CDP, DepVars);
+    }
   }
 }
 
diff --git a/utils/TableGen/CodeGenDAGPatterns.h b/utils/TableGen/CodeGenDAGPatterns.h
index 8b3e19142370..afbcb10a4b66 100644
--- a/utils/TableGen/CodeGenDAGPatterns.h
+++ b/utils/TableGen/CodeGenDAGPatterns.h
@@ -15,163 +15,334 @@
 #ifndef LLVM_UTILS_TABLEGEN_CODEGENDAGPATTERNS_H
 #define LLVM_UTILS_TABLEGEN_CODEGENDAGPATTERNS_H
 
+#include "CodeGenHwModes.h"
 #include "CodeGenIntrinsics.h"
 #include "CodeGenTarget.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
 #include <algorithm>
+#include <array>
+#include <functional>
 #include <map>
 #include <set>
 #include <vector>
 
 namespace llvm {
-  class Record;
-  class Init;
-  class ListInit;
-  class DagInit;
-  class SDNodeInfo;
-  class TreePattern;
-  class TreePatternNode;
-  class CodeGenDAGPatterns;
-  class ComplexPattern;
-
-/// EEVT::DAGISelGenValueType - These are some extended forms of
-/// MVT::SimpleValueType that we use as lattice values during type inference.
-/// The existing MVT iAny, fAny and vAny types suffice to represent
-/// arbitrary integer, floating-point, and vector types, so only an unknown
-/// value is needed.
-namespace EEVT {
-  /// TypeSet - This is either empty if it's completely unknown, or holds a set
-  /// of types.  It is used during type inference because register classes can
-  /// have multiple possible types and we don't know which one they get until
-  /// type inference is complete.
-  ///
-  /// TypeSet can have three states:
-  ///    Vector is empty: The type is completely unknown, it can be any valid
-  ///       target type.
-  ///    Vector has multiple constrained types: (e.g. v4i32 + v4f32) it is one
-  ///       of those types only.
-  ///    Vector has one concrete type: The type is completely known.
-  ///
-  class TypeSet {
-    SmallVector<MVT::SimpleValueType, 4> TypeVec;
-  public:
-    TypeSet() {}
-    TypeSet(MVT::SimpleValueType VT, TreePattern &TP);
-    TypeSet(ArrayRef<MVT::SimpleValueType> VTList);
-
-    bool isCompletelyUnknown() const { return TypeVec.empty(); }
-
-    bool isConcrete() const {
-      if (TypeVec.size() != 1) return false;
-      unsigned char T = TypeVec[0]; (void)T;
-      assert(T < MVT::LAST_VALUETYPE || T == MVT::iPTR || T == MVT::iPTRAny);
-      return true;
-    }
 
-    MVT::SimpleValueType getConcrete() const {
-      assert(isConcrete() && "Type isn't concrete yet");
-      return (MVT::SimpleValueType)TypeVec[0];
-    }
+class Record;
+class Init;
+class ListInit;
+class DagInit;
+class SDNodeInfo;
+class TreePattern;
+class TreePatternNode;
+class CodeGenDAGPatterns;
+class ComplexPattern;
+
+/// This represents a set of MVTs. Since the underlying type for the MVT
+/// is uint8_t, there are at most 256 values. To reduce the number of memory
+/// allocations and deallocations, represent the set as a sequence of bits.
+/// To reduce the allocations even further, make MachineValueTypeSet own
+/// the storage and use std::array as the bit container.
+struct MachineValueTypeSet {
+  static_assert(std::is_same<std::underlying_type<MVT::SimpleValueType>::type,
+                             uint8_t>::value,
+                "Change uint8_t here to the SimpleValueType's type");
+  static unsigned constexpr Capacity = std::numeric_limits<uint8_t>::max()+1;
+  using WordType = uint64_t;
+  static unsigned constexpr WordWidth = CHAR_BIT*sizeof(WordType);
+  static unsigned constexpr NumWords = Capacity/WordWidth;
+  static_assert(NumWords*WordWidth == Capacity,
+                "Capacity should be a multiple of WordWidth");
+
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  MachineValueTypeSet() {
+    clear();
+  }
+
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  unsigned size() const {
+    unsigned Count = 0;
+    for (WordType W : Words)
+      Count += countPopulation(W);
+    return Count;
+  }
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  void clear() {
+    std::memset(Words.data(), 0, NumWords*sizeof(WordType));
+  }
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  bool empty() const {
+    for (WordType W : Words)
+      if (W != 0)
+        return false;
+    return true;
+  }
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  unsigned count(MVT T) const {
+    return (Words[T.SimpleTy / WordWidth] >> (T.SimpleTy % WordWidth)) & 1;
+  }
+  std::pair<MachineValueTypeSet&,bool> insert(MVT T) {
+    bool V = count(T.SimpleTy);
+    Words[T.SimpleTy / WordWidth] |= WordType(1) << (T.SimpleTy % WordWidth);
+    return {*this, V};
+  }
+  MachineValueTypeSet &insert(const MachineValueTypeSet &S) {
+    for (unsigned i = 0; i != NumWords; ++i)
+      Words[i] |= S.Words[i];
+    return *this;
+  }
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  void erase(MVT T) {
+    Words[T.SimpleTy / WordWidth] &= ~(WordType(1) << (T.SimpleTy % WordWidth));
+  }
 
-    bool isDynamicallyResolved() const {
-      return getConcrete() == MVT::iPTR || getConcrete() == MVT::iPTRAny;
+  struct const_iterator {
+    // Some implementations of the C++ library require these traits to be
+    // defined.
+    using iterator_category = std::forward_iterator_tag;
+    using value_type = MVT;
+    using difference_type = ptrdiff_t;
+    using pointer = const MVT*;
+    using reference = const MVT&;
+
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
+    MVT operator*() const {
+      assert(Pos != Capacity);
+      return MVT::SimpleValueType(Pos);
+    }
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
+    const_iterator(const MachineValueTypeSet *S, bool End) : Set(S) {
+      Pos = End ? Capacity : find_from_pos(0);
+    }
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
+    const_iterator &operator++() {
+      assert(Pos != Capacity);
+      Pos = find_from_pos(Pos+1);
+      return *this;
     }
 
-    const SmallVectorImpl<MVT::SimpleValueType> &getTypeList() const {
-      assert(!TypeVec.empty() && "Not a type list!");
-      return TypeVec;
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
+    bool operator==(const const_iterator &It) const {
+      return Set == It.Set && Pos == It.Pos;
+    }
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
+    bool operator!=(const const_iterator &It) const {
+      return !operator==(It);
     }
 
-    bool isVoid() const {
-      return TypeVec.size() == 1 && TypeVec[0] == MVT::isVoid;
+  private:
+    unsigned find_from_pos(unsigned P) const {
+      unsigned SkipWords = P / WordWidth;
+      unsigned SkipBits = P % WordWidth;
+      unsigned Count = SkipWords * WordWidth;
+
+      // If P is in the middle of a word, process it manually here, because
+      // the trailing bits need to be masked off to use findFirstSet.
+      if (SkipBits != 0) {
+        WordType W = Set->Words[SkipWords];
+        W &= maskLeadingOnes<WordType>(WordWidth-SkipBits);
+        if (W != 0)
+          return Count + findFirstSet(W);
+        Count += WordWidth;
+        SkipWords++;
+      }
+
+      for (unsigned i = SkipWords; i != NumWords; ++i) {
+        WordType W = Set->Words[i];
+        if (W != 0)
+          return Count + findFirstSet(W);
+        Count += WordWidth;
+      }
+      return Capacity;
     }
 
-    /// hasIntegerTypes - Return true if this TypeSet contains any integer value
-    /// types.
-    bool hasIntegerTypes() const;
+    const MachineValueTypeSet *Set;
+    unsigned Pos;
+  };
 
-    /// hasFloatingPointTypes - Return true if this TypeSet contains an fAny or
-    /// a floating point value type.
-    bool hasFloatingPointTypes() const;
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  const_iterator begin() const { return const_iterator(this, false); }
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  const_iterator end()   const { return const_iterator(this, true); }
 
-    /// hasScalarTypes - Return true if this TypeSet contains a scalar value
-    /// type.
-    bool hasScalarTypes() const;
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  bool operator==(const MachineValueTypeSet &S) const {
+    return Words == S.Words;
+  }
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  bool operator!=(const MachineValueTypeSet &S) const {
+    return !operator==(S);
+  }
 
-    /// hasVectorTypes - Return true if this TypeSet contains a vector value
-    /// type.
-    bool hasVectorTypes() const;
+private:
+  friend struct const_iterator;
+  std::array<WordType,NumWords> Words;
+};
 
-    /// getName() - Return this TypeSet as a string.
-    std::string getName() const;
+struct TypeSetByHwMode : public InfoByHwMode<MachineValueTypeSet> {
+  using SetType = MachineValueTypeSet;
+
+  TypeSetByHwMode() = default;
+  TypeSetByHwMode(const TypeSetByHwMode &VTS) = default;
+  TypeSetByHwMode(MVT::SimpleValueType VT)
+    : TypeSetByHwMode(ValueTypeByHwMode(VT)) {}
+  TypeSetByHwMode(ValueTypeByHwMode VT)
+    : TypeSetByHwMode(ArrayRef<ValueTypeByHwMode>(&VT, 1)) {}
+  TypeSetByHwMode(ArrayRef<ValueTypeByHwMode> VTList);
+
+  SetType &getOrCreate(unsigned Mode) {
+    if (hasMode(Mode))
+      return get(Mode);
+    return Map.insert({Mode,SetType()}).first->second;
+  }
 
-    /// MergeInTypeInfo - This merges in type information from the specified
-    /// argument.  If 'this' changes, it returns true.  If the two types are
-    /// contradictory (e.g. merge f32 into i32) then this flags an error.
-    bool MergeInTypeInfo(const EEVT::TypeSet &InVT, TreePattern &TP);
+  bool isValueTypeByHwMode(bool AllowEmpty) const;
+  ValueTypeByHwMode getValueTypeByHwMode() const;
 
-    bool MergeInTypeInfo(MVT::SimpleValueType InVT, TreePattern &TP) {
-      return MergeInTypeInfo(EEVT::TypeSet(InVT, TP), TP);
-    }
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  bool isMachineValueType() const {
+    return isDefaultOnly() && Map.begin()->second.size() == 1;
+  }
 
-    /// Force this type list to only contain integer types.
-    bool EnforceInteger(TreePattern &TP);
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  MVT getMachineValueType() const {
+    assert(isMachineValueType());
+    return *Map.begin()->second.begin();
+  }
 
-    /// Force this type list to only contain floating point types.
-    bool EnforceFloatingPoint(TreePattern &TP);
+  bool isPossible() const;
 
-    /// EnforceScalar - Remove all vector types from this type list.
-    bool EnforceScalar(TreePattern &TP);
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  bool isDefaultOnly() const {
+    return Map.size() == 1 && Map.begin()->first == DefaultMode;
+  }
 
-    /// EnforceVector - Remove all non-vector types from this type list.
-    bool EnforceVector(TreePattern &TP);
+  bool insert(const ValueTypeByHwMode &VVT);
+  bool constrain(const TypeSetByHwMode &VTS);
+  template <typename Predicate> bool constrain(Predicate P);
+  template <typename Predicate>
+  bool assign_if(const TypeSetByHwMode &VTS, Predicate P);
 
-    /// EnforceSmallerThan - 'this' must be a smaller VT than Other.  Update
-    /// this an other based on this information.
-    bool EnforceSmallerThan(EEVT::TypeSet &Other, TreePattern &TP);
+  void writeToStream(raw_ostream &OS) const;
+  static void writeToStream(const SetType &S, raw_ostream &OS);
 
-    /// EnforceVectorEltTypeIs - 'this' is now constrained to be a vector type
-    /// whose element is VT.
-    bool EnforceVectorEltTypeIs(EEVT::TypeSet &VT, TreePattern &TP);
+  bool operator==(const TypeSetByHwMode &VTS) const;
+  bool operator!=(const TypeSetByHwMode &VTS) const { return !(*this == VTS); }
 
-    /// EnforceVectorEltTypeIs - 'this' is now constrained to be a vector type
-    /// whose element is VT.
-    bool EnforceVectorEltTypeIs(MVT::SimpleValueType VT, TreePattern &TP);
+  void dump() const;
+  void validate() const;
 
-    /// EnforceVectorSubVectorTypeIs - 'this' is now constrained to
-    /// be a vector type VT.
-    bool EnforceVectorSubVectorTypeIs(EEVT::TypeSet &VT, TreePattern &TP);
+private:
+  /// Intersect two sets. Return true if anything has changed.
+  bool intersect(SetType &Out, const SetType &In);
+};
 
-    /// EnforceSameNumElts - If VTOperand is a scalar, then 'this' is a scalar.
-    /// If VTOperand is a vector, then 'this' must have the same number of
-    /// elements.
-    bool EnforceSameNumElts(EEVT::TypeSet &VT, TreePattern &TP);
+raw_ostream &operator<<(raw_ostream &OS, const TypeSetByHwMode &T);
 
-    /// EnforceSameSize - 'this' is now constrained to be the same size as VT.
-    bool EnforceSameSize(EEVT::TypeSet &VT, TreePattern &TP);
+struct TypeInfer {
+  TypeInfer(TreePattern &T) : TP(T), ForceMode(0) {}
 
-    bool operator!=(const TypeSet &RHS) const { return TypeVec != RHS.TypeVec; }
-    bool operator==(const TypeSet &RHS) const { return TypeVec == RHS.TypeVec; }
+  bool isConcrete(const TypeSetByHwMode &VTS, bool AllowEmpty) const {
+    return VTS.isValueTypeByHwMode(AllowEmpty);
+  }
+  ValueTypeByHwMode getConcrete(const TypeSetByHwMode &VTS,
+                                bool AllowEmpty) const {
+    assert(VTS.isValueTypeByHwMode(AllowEmpty));
+    return VTS.getValueTypeByHwMode();
+  }
 
-  private:
-    /// FillWithPossibleTypes - Set to all legal types and return true, only
-    /// valid on completely unknown type sets.  If Pred is non-null, only MVTs
-    /// that pass the predicate are added.
-    bool FillWithPossibleTypes(TreePattern &TP,
-                               bool (*Pred)(MVT::SimpleValueType) = nullptr,
-                               const char *PredicateName = nullptr);
+  /// The protocol in the following functions (Merge*, force*, Enforce*,
+  /// expand*) is to return "true" if a change has been made, "false"
+  /// otherwise.
+
+  bool MergeInTypeInfo(TypeSetByHwMode &Out, const TypeSetByHwMode &In);
+  bool MergeInTypeInfo(TypeSetByHwMode &Out, MVT::SimpleValueType InVT) {
+    return MergeInTypeInfo(Out, TypeSetByHwMode(InVT));
+  }
+  bool MergeInTypeInfo(TypeSetByHwMode &Out, ValueTypeByHwMode InVT) {
+    return MergeInTypeInfo(Out, TypeSetByHwMode(InVT));
+  }
+
+  /// Reduce the set \p Out to have at most one element for each mode.
+  bool forceArbitrary(TypeSetByHwMode &Out);
+
+  /// The following four functions ensure that upon return the set \p Out
+  /// will only contain types of the specified kind: integer, floating-point,
+  /// scalar, or vector.
+  /// If \p Out is empty, all legal types of the specified kind will be added
+  /// to it. Otherwise, all types that are not of the specified kind will be
+  /// removed from \p Out.
+  bool EnforceInteger(TypeSetByHwMode &Out);
+  bool EnforceFloatingPoint(TypeSetByHwMode &Out);
+  bool EnforceScalar(TypeSetByHwMode &Out);
+  bool EnforceVector(TypeSetByHwMode &Out);
+
+  /// If \p Out is empty, fill it with all legal types. Otherwise, leave it
+  /// unchanged.
+  bool EnforceAny(TypeSetByHwMode &Out);
+  /// Make sure that for each type in \p Small, there exists a larger type
+  /// in \p Big.
+  bool EnforceSmallerThan(TypeSetByHwMode &Small, TypeSetByHwMode &Big);
+  /// 1. Ensure that for each type T in \p Vec, T is a vector type, and that
+  ///    for each type U in \p Elem, U is a scalar type.
+  /// 2. Ensure that for each (scalar) type U in \p Elem, there exists a
+  ///    (vector) type T in \p Vec, such that U is the element type of T.
+  bool EnforceVectorEltTypeIs(TypeSetByHwMode &Vec, TypeSetByHwMode &Elem);
+  bool EnforceVectorEltTypeIs(TypeSetByHwMode &Vec,
+                              const ValueTypeByHwMode &VVT);
+  /// Ensure that for each type T in \p Sub, T is a vector type, and there
+  /// exists a type U in \p Vec such that U is a vector type with the same
+  /// element type as T and at least as many elements as T.
+  bool EnforceVectorSubVectorTypeIs(TypeSetByHwMode &Vec,
+                                    TypeSetByHwMode &Sub);
+  /// 1. Ensure that \p V has a scalar type iff \p W has a scalar type.
+  /// 2. Ensure that for each vector type T in \p V, there exists a vector
+  ///    type U in \p W, such that T and U have the same number of elements.
+  /// 3. Ensure that for each vector type U in \p W, there exists a vector
+  ///    type T in \p V, such that T and U have the same number of elements
+  ///    (reverse of 2).
+  bool EnforceSameNumElts(TypeSetByHwMode &V, TypeSetByHwMode &W);
+  /// 1. Ensure that for each type T in \p A, there exists a type U in \p B,
+  ///    such that T and U have equal size in bits.
+  /// 2. Ensure that for each type U in \p B, there exists a type T in \p A
+  ///    such that T and U have equal size in bits (reverse of 1).
+  bool EnforceSameSize(TypeSetByHwMode &A, TypeSetByHwMode &B);
+
+  /// For each overloaded type (i.e. of form *Any), replace it with the
+  /// corresponding subset of legal, specific types.
+  void expandOverloads(TypeSetByHwMode &VTS);
+  void expandOverloads(TypeSetByHwMode::SetType &Out,
+                       const TypeSetByHwMode::SetType &Legal);
+
+  struct ValidateOnExit {
+    ValidateOnExit(TypeSetByHwMode &T) : VTS(T) {}
+    ~ValidateOnExit() { VTS.validate(); }
+    TypeSetByHwMode &VTS;
   };
-}
+
+  TreePattern &TP;
+  unsigned ForceMode;     // Mode to use when set.
+  bool CodeGen = false;   // Set during generation of matcher code.
+
+private:
+  TypeSetByHwMode getLegalTypes();
+
+  /// Cached legal types.
+  bool LegalTypesCached = false;
+  TypeSetByHwMode::SetType LegalCache = {};
+};
 
 /// Set type used to track multiply used variables in patterns
-typedef std::set<std::string> MultipleUseVarSet;
+typedef StringSet<> MultipleUseVarSet;
 
 /// SDTypeConstraint - This is a discriminated union of constraints,
 /// corresponding to the SDTypeConstraint tablegen class in Target.td.
 struct SDTypeConstraint {
-  SDTypeConstraint(Record *R);
+  SDTypeConstraint(Record *R, const CodeGenHwModes &CGH);
 
   unsigned OperandNo;   // The operand # this constraint applies to.
   enum {
@@ -182,9 +353,6 @@ struct SDTypeConstraint {
 
   union {   // The discriminated union.
     struct {
-      MVT::SimpleValueType VT;
-    } SDTCisVT_Info;
-    struct {
       unsigned OtherOperandNum;
     } SDTCisSameAs_Info;
     struct {
@@ -200,9 +368,6 @@ struct SDTypeConstraint {
       unsigned OtherOperandNum;
     } SDTCisSubVecOfVec_Info;
     struct {
-      MVT::SimpleValueType VT;
-    } SDTCVecEltisVT_Info;
-    struct {
       unsigned OtherOperandNum;
     } SDTCisSameNumEltsAs_Info;
     struct {
@@ -210,6 +375,10 @@ struct SDTypeConstraint {
     } SDTCisSameSizeAs_Info;
   } x;
 
+  // The VT for SDTCisVT and SDTCVecEltisVT.
+  // Must not be in the union because it has a non-trivial destructor.
+  ValueTypeByHwMode VVT;
+
   /// ApplyTypeConstraint - Given a node in a pattern, apply this type
   /// constraint to the nodes operands.  This returns true if it makes a
   /// change, false otherwise.  If a type contradiction is found, an error
@@ -230,7 +399,8 @@ class SDNodeInfo {
   int NumOperands;
   std::vector<SDTypeConstraint> TypeConstraints;
 public:
-  SDNodeInfo(Record *R);  // Parse the specified record.
+  // Parse the specified record.
+  SDNodeInfo(Record *R, const CodeGenHwModes &CGH);
 
   unsigned getNumResults() const { return NumResults; }
 
@@ -258,14 +428,9 @@ public:
   /// constraints for this node to the operands of the node.  This returns
   /// true if it makes a change, false otherwise.  If a type contradiction is
   /// found, an error is flagged.
-  bool ApplyTypeConstraints(TreePatternNode *N, TreePattern &TP) const {
-    bool MadeChange = false;
-    for (unsigned i = 0, e = TypeConstraints.size(); i != e; ++i)
-      MadeChange |= TypeConstraints[i].ApplyTypeConstraint(N, *this, TP);
-    return MadeChange;
-  }
+  bool ApplyTypeConstraints(TreePatternNode *N, TreePattern &TP) const;
 };
-  
+
 /// TreePredicateFn - This is an abstraction that represents the predicates on
 /// a PatFrag node.  This is a simple one-word wrapper around a pointer to
 /// provide nice accessors.
@@ -277,14 +442,14 @@ public:
   /// TreePredicateFn constructor.  Here 'N' is a subclass of PatFrag.
   TreePredicateFn(TreePattern *N);
 
-  
+
   TreePattern *getOrigPatFragRecord() const { return PatFragRec; }
-  
+
   /// isAlwaysTrue - Return true if this is a noop predicate.
   bool isAlwaysTrue() const;
-  
-  bool isImmediatePattern() const { return !getImmCode().empty(); }
-  
+
+  bool isImmediatePattern() const { return hasImmCode(); }
+
   /// getImmediatePredicateCode - Return the code that evaluates this pattern if
   /// this is an immediate predicate.  It is an error to call this on a
   /// non-immediate pattern.
@@ -293,8 +458,7 @@ public:
     assert(!Result.empty() && "Isn't an immediate pattern!");
     return Result;
   }
-  
-  
+
   bool operator==(const TreePredicateFn &RHS) const {
     return PatFragRec == RHS.PatFragRec;
   }
@@ -304,18 +468,83 @@ public:
   /// Return the name to use in the generated code to reference this, this is
   /// "Predicate_foo" if from a pattern fragment "foo".
   std::string getFnName() const;
-  
+
   /// getCodeToRunOnSDNode - Return the code for the function body that
   /// evaluates this predicate.  The argument is expected to be in "Node",
   /// not N.  This handles casting and conversion to a concrete node type as
   /// appropriate.
   std::string getCodeToRunOnSDNode() const;
-  
+
+  /// Get the data type of the argument to getImmediatePredicateCode().
+  StringRef getImmType() const;
+
+  /// Get a string that describes the type returned by getImmType() but is
+  /// usable as part of an identifier.
+  StringRef getImmTypeIdentifier() const;
+
+  // Is the desired predefined predicate for a load?
+  bool isLoad() const;
+  // Is the desired predefined predicate for a store?
+  bool isStore() const;
+  // Is the desired predefined predicate for an atomic?
+  bool isAtomic() const;
+
+  /// Is this predicate the predefined unindexed load predicate?
+  /// Is this predicate the predefined unindexed store predicate?
+  bool isUnindexed() const;
+  /// Is this predicate the predefined non-extending load predicate?
+  bool isNonExtLoad() const;
+  /// Is this predicate the predefined any-extend load predicate?
+  bool isAnyExtLoad() const;
+  /// Is this predicate the predefined sign-extend load predicate?
+  bool isSignExtLoad() const;
+  /// Is this predicate the predefined zero-extend load predicate?
+  bool isZeroExtLoad() const;
+  /// Is this predicate the predefined non-truncating store predicate?
+  bool isNonTruncStore() const;
+  /// Is this predicate the predefined truncating store predicate?
+  bool isTruncStore() const;
+
+  /// Is this predicate the predefined monotonic atomic predicate?
+  bool isAtomicOrderingMonotonic() const;
+  /// Is this predicate the predefined acquire atomic predicate?
+  bool isAtomicOrderingAcquire() const;
+  /// Is this predicate the predefined release atomic predicate?
+  bool isAtomicOrderingRelease() const;
+  /// Is this predicate the predefined acquire-release atomic predicate?
+  bool isAtomicOrderingAcquireRelease() const;
+  /// Is this predicate the predefined sequentially consistent atomic predicate?
+  bool isAtomicOrderingSequentiallyConsistent() const;
+
+  /// Is this predicate the predefined acquire-or-stronger atomic predicate?
+  bool isAtomicOrderingAcquireOrStronger() const;
+  /// Is this predicate the predefined weaker-than-acquire atomic predicate?
+  bool isAtomicOrderingWeakerThanAcquire() const;
+
+  /// Is this predicate the predefined release-or-stronger atomic predicate?
+  bool isAtomicOrderingReleaseOrStronger() const;
+  /// Is this predicate the predefined weaker-than-release atomic predicate?
+  bool isAtomicOrderingWeakerThanRelease() const;
+
+  /// If non-null, indicates that this predicate is a predefined memory VT
+  /// predicate for a load/store and returns the ValueType record for the memory VT.
+  Record *getMemoryVT() const;
+  /// If non-null, indicates that this predicate is a predefined memory VT
+  /// predicate (checking only the scalar type) for load/store and returns the
+  /// ValueType record for the memory VT.
+  Record *getScalarMemoryVT() const;
+
 private:
+  bool hasPredCode() const;
+  bool hasImmCode() const;
   std::string getPredCode() const;
   std::string getImmCode() const;
+  bool immCodeUsesAPInt() const;
+  bool immCodeUsesAPFloat() const;
+
+  bool isPredefinedPredicateEqualTo(StringRef Field, bool Value) const;
 };
-  
+
 
 /// FIXME: TreePatternNode's can be shared in some cases (due to dag-shaped
 /// patterns), and as such should be ref counted.  We currently just leak all
@@ -324,7 +553,7 @@ class TreePatternNode {
   /// The type of each node result.  Before and during type inference, each
   /// result may be a set of possible types.  After (successful) type inference,
   /// each is a single concrete type.
-  SmallVector<EEVT::TypeSet, 1> Types;
+  std::vector<TypeSetByHwMode> Types;
 
   /// Operator - The Record for the operator if this is an interior node (not
   /// a leaf).
@@ -367,22 +596,24 @@ public:
 
   // Type accessors.
   unsigned getNumTypes() const { return Types.size(); }
-  MVT::SimpleValueType getType(unsigned ResNo) const {
-    return Types[ResNo].getConcrete();
+  ValueTypeByHwMode getType(unsigned ResNo) const {
+    return Types[ResNo].getValueTypeByHwMode();
   }
-  const SmallVectorImpl<EEVT::TypeSet> &getExtTypes() const { return Types; }
-  const EEVT::TypeSet &getExtType(unsigned ResNo) const { return Types[ResNo]; }
-  EEVT::TypeSet &getExtType(unsigned ResNo) { return Types[ResNo]; }
-  void setType(unsigned ResNo, const EEVT::TypeSet &T) { Types[ResNo] = T; }
-
-  bool hasTypeSet(unsigned ResNo) const {
-    return Types[ResNo].isConcrete();
+  const std::vector<TypeSetByHwMode> &getExtTypes() const { return Types; }
+  const TypeSetByHwMode &getExtType(unsigned ResNo) const {
+    return Types[ResNo];
+  }
+  TypeSetByHwMode &getExtType(unsigned ResNo) { return Types[ResNo]; }
+  void setType(unsigned ResNo, const TypeSetByHwMode &T) { Types[ResNo] = T; }
+  MVT::SimpleValueType getSimpleType(unsigned ResNo) const {
+    return Types[ResNo].getMachineValueType().SimpleTy;
   }
-  bool isTypeCompletelyUnknown(unsigned ResNo) const {
-    return Types[ResNo].isCompletelyUnknown();
+
+  bool hasConcreteType(unsigned ResNo) const {
+    return Types[ResNo].isValueTypeByHwMode(false);
   }
-  bool isTypeDynamicallyResolved(unsigned ResNo) const {
-    return Types[ResNo].isDynamicallyResolved();
+  bool isTypeCompletelyUnknown(unsigned ResNo, TreePattern &TP) const {
+    return Types[ResNo].empty();
   }
 
   Init *getLeafValue() const { assert(isLeaf()); return Val; }
@@ -401,8 +632,12 @@ public:
     return false;
   }
 
+  bool hasProperTypeByHwMode() const;
+  bool hasPossibleType() const;
+  bool setDefaultMode(unsigned Mode);
+
   bool hasAnyPredicate() const { return !PredicateFns.empty(); }
-  
+
   const std::vector<TreePredicateFn> &getPredicateFns() const {
     return PredicateFns;
   }
@@ -484,15 +719,12 @@ public:   // Higher level manipulation routines.
   /// information.  If N already contains a conflicting type, then flag an
   /// error.  This returns true if any information was updated.
   ///
-  bool UpdateNodeType(unsigned ResNo, const EEVT::TypeSet &InTy,
-                      TreePattern &TP) {
-    return Types[ResNo].MergeInTypeInfo(InTy, TP);
-  }
-
+  bool UpdateNodeType(unsigned ResNo, const TypeSetByHwMode &InTy,
+                      TreePattern &TP);
   bool UpdateNodeType(unsigned ResNo, MVT::SimpleValueType InTy,
-                      TreePattern &TP) {
-    return Types[ResNo].MergeInTypeInfo(EEVT::TypeSet(InTy, TP), TP);
-  }
+                      TreePattern &TP);
+  bool UpdateNodeType(unsigned ResNo, ValueTypeByHwMode InTy,
+                      TreePattern &TP);
 
   // Update node type with types inferred from an instruction operand or result
   // def from the ins/outs lists.
@@ -501,14 +733,7 @@ public:   // Higher level manipulation routines.
 
   /// ContainsUnresolvedType - Return true if this tree contains any
   /// unresolved types.
-  bool ContainsUnresolvedType() const {
-    for (unsigned i = 0, e = Types.size(); i != e; ++i)
-      if (!Types[i].isConcrete()) return true;
-
-    for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
-      if (getChild(i)->ContainsUnresolvedType()) return true;
-    return false;
-  }
+  bool ContainsUnresolvedType(TreePattern &TP) const;
 
   /// canPatternMatch - If it is impossible for this pattern to match on this
   /// target, fill in Reason and return false.  Otherwise, return true.
@@ -560,6 +785,9 @@ class TreePattern {
   /// number for each operand encountered in a ComplexPattern to aid in that
   /// check.
   StringMap<std::pair<Record *, unsigned>> ComplexPatternOperands;
+
+  TypeInfer Infer;
+
 public:
 
   /// TreePattern constructor - Parse the specified DagInits into the
@@ -576,6 +804,7 @@ public:
   const std::vector<TreePatternNode*> &getTrees() const { return Trees; }
   unsigned getNumTrees() const { return Trees.size(); }
   TreePatternNode *getTree(unsigned i) const { return Trees[i]; }
+  void setTree(unsigned i, TreePatternNode *Tree) { Trees[i] = Tree; }
   TreePatternNode *getOnlyTree() const {
     assert(Trees.size() == 1 && "Doesn't have exactly one pattern!");
     return Trees[0];
@@ -625,6 +854,8 @@ public:
     HasError = false;
   }
 
+  TypeInfer &getInfer() { return Infer; }
+
   void print(raw_ostream &OS) const;
   void dump() const;
 
@@ -634,6 +865,32 @@ private:
   void ComputeNamedNodes(TreePatternNode *N);
 };
 
+
+inline bool TreePatternNode::UpdateNodeType(unsigned ResNo,
+                                            const TypeSetByHwMode &InTy,
+                                            TreePattern &TP) {
+  TypeSetByHwMode VTS(InTy);
+  TP.getInfer().expandOverloads(VTS);
+  return TP.getInfer().MergeInTypeInfo(Types[ResNo], VTS);
+}
+
+inline bool TreePatternNode::UpdateNodeType(unsigned ResNo,
+                                            MVT::SimpleValueType InTy,
+                                            TreePattern &TP) {
+  TypeSetByHwMode VTS(InTy);
+  TP.getInfer().expandOverloads(VTS);
+  return TP.getInfer().MergeInTypeInfo(Types[ResNo], VTS);
+}
+
+inline bool TreePatternNode::UpdateNodeType(unsigned ResNo,
+                                            ValueTypeByHwMode InTy,
+                                            TreePattern &TP) {
+  TypeSetByHwMode VTS(InTy);
+  TP.getInfer().expandOverloads(VTS);
+  return TP.getInfer().MergeInTypeInfo(Types[ResNo], VTS);
+}
+
+
 /// DAGDefaultOperand - One of these is created for each OperandWithDefaultOps
 /// that has a set ExecuteAlways / DefaultOps field.
 struct DAGDefaultOperand {
@@ -680,31 +937,89 @@ public:
   TreePatternNode *getResultPattern() const { return ResultPattern; }
 };
 
+/// This class represents a condition that has to be satisfied for a pattern
+/// to be tried. It is a generalization of a class "Pattern" from Target.td:
+/// in addition to the Target.td's predicates, this class can also represent
+/// conditions associated with HW modes. Both types will eventually become
+/// strings containing C++ code to be executed, the difference is in how
+/// these strings are generated.
+class Predicate {
+public:
+  Predicate(Record *R, bool C = true) : Def(R), IfCond(C), IsHwMode(false) {
+    assert(R->isSubClassOf("Predicate") &&
+           "Predicate objects should only be created for records derived"
+           "from Predicate class");
+  }
+  Predicate(StringRef FS, bool C = true) : Def(nullptr), Features(FS.str()),
+    IfCond(C), IsHwMode(true) {}
+
+  /// Return a string which contains the C++ condition code that will serve
+  /// as a predicate during instruction selection.
+  std::string getCondString() const {
+    // The string will excute in a subclass of SelectionDAGISel.
+    // Cast to std::string explicitly to avoid ambiguity with StringRef.
+    std::string C = IsHwMode
+        ? std::string("MF->getSubtarget().checkFeatures(\"" + Features + "\")")
+        : std::string(Def->getValueAsString("CondString"));
+    return IfCond ? C : "!("+C+')';
+  }
+  bool operator==(const Predicate &P) const {
+    return IfCond == P.IfCond && IsHwMode == P.IsHwMode && Def == P.Def;
+  }
+  bool operator<(const Predicate &P) const {
+    if (IsHwMode != P.IsHwMode)
+      return IsHwMode < P.IsHwMode;
+    assert(!Def == !P.Def && "Inconsistency between Def and IsHwMode");
+    if (IfCond != P.IfCond)
+      return IfCond < P.IfCond;
+    if (Def)
+      return LessRecord()(Def, P.Def);
+    return Features < P.Features;
+  }
+  Record *Def;            ///< Predicate definition from .td file, null for
+                          ///< HW modes.
+  std::string Features;   ///< Feature string for HW mode.
+  bool IfCond;            ///< The boolean value that the condition has to
+                          ///< evaluate to for this predicate to be true.
+  bool IsHwMode;          ///< Does this predicate correspond to a HW mode?
+};
+
 /// PatternToMatch - Used by CodeGenDAGPatterns to keep tab of patterns
 /// processed to produce isel.
 class PatternToMatch {
 public:
-  PatternToMatch(Record *srcrecord, ListInit *preds, TreePatternNode *src,
-                 TreePatternNode *dst, std::vector<Record *> dstregs,
-                 int complexity, unsigned uid)
-      : SrcRecord(srcrecord), Predicates(preds), SrcPattern(src),
-        DstPattern(dst), Dstregs(std::move(dstregs)),
-        AddedComplexity(complexity), ID(uid) {}
+  PatternToMatch(Record *srcrecord, const std::vector<Predicate> &preds,
+                 TreePatternNode *src, TreePatternNode *dst,
+                 const std::vector<Record*> &dstregs,
+                 int complexity, unsigned uid, unsigned setmode = 0)
+    : SrcRecord(srcrecord), SrcPattern(src), DstPattern(dst),
+      Predicates(preds), Dstregs(std::move(dstregs)),
+      AddedComplexity(complexity), ID(uid), ForceMode(setmode) {}
+
+  PatternToMatch(Record *srcrecord, std::vector<Predicate> &&preds,
+                 TreePatternNode *src, TreePatternNode *dst,
+                 std::vector<Record*> &&dstregs,
+                 int complexity, unsigned uid, unsigned setmode = 0)
+    : SrcRecord(srcrecord), SrcPattern(src), DstPattern(dst),
+      Predicates(preds), Dstregs(std::move(dstregs)),
+      AddedComplexity(complexity), ID(uid), ForceMode(setmode) {}
 
   Record          *SrcRecord;   // Originating Record for the pattern.
-  ListInit        *Predicates;  // Top level predicate conditions to match.
   TreePatternNode *SrcPattern;  // Source pattern to match.
   TreePatternNode *DstPattern;  // Resulting pattern.
+  std::vector<Predicate> Predicates;  // Top level predicate conditions
+                                      // to match.
   std::vector<Record*> Dstregs; // Physical register defs being matched.
   int              AddedComplexity; // Add to matching pattern complexity.
   unsigned         ID;          // Unique ID for the record.
+  unsigned         ForceMode;   // Force this mode in type inference when set.
 
   Record          *getSrcRecord()  const { return SrcRecord; }
-  ListInit        *getPredicates() const { return Predicates; }
   TreePatternNode *getSrcPattern() const { return SrcPattern; }
   TreePatternNode *getDstPattern() const { return DstPattern; }
   const std::vector<Record*> &getDstRegs() const { return Dstregs; }
   int         getAddedComplexity() const { return AddedComplexity; }
+  const std::vector<Predicate> &getPredicates() const { return Predicates; }
 
   std::string getPredicateCheck() const;
 
@@ -720,7 +1035,8 @@ class CodeGenDAGPatterns {
   CodeGenIntrinsicTable TgtIntrinsics;
 
   std::map<Record*, SDNodeInfo, LessRecordByID> SDNodes;
-  std::map<Record*, std::pair<Record*, std::string>, LessRecordByID> SDNodeXForms;
+  std::map<Record*, std::pair<Record*, std::string>, LessRecordByID>
+      SDNodeXForms;
   std::map<Record*, ComplexPattern, LessRecordByID> ComplexPatterns;
   std::map<Record *, std::unique_ptr<TreePattern>, LessRecordByID>
       PatternFragments;
@@ -735,24 +1051,34 @@ class CodeGenDAGPatterns {
   /// value is the pattern to match, the second pattern is the result to
   /// emit.
   std::vector<PatternToMatch> PatternsToMatch;
+
+  TypeSetByHwMode LegalVTS;
+
+  using PatternRewriterFn = std::function<void (TreePattern *)>;
+  PatternRewriterFn PatternRewriter;
+
 public:
-  CodeGenDAGPatterns(RecordKeeper &R);
+  CodeGenDAGPatterns(RecordKeeper &R,
+                     PatternRewriterFn PatternRewriter = nullptr);
 
   CodeGenTarget &getTargetInfo() { return Target; }
   const CodeGenTarget &getTargetInfo() const { return Target; }
+  const TypeSetByHwMode &getLegalTypes() const { return LegalVTS; }
 
   Record *getSDNodeNamed(const std::string &Name) const;
 
   const SDNodeInfo &getSDNodeInfo(Record *R) const {
-    assert(SDNodes.count(R) && "Unknown node!");
-    return SDNodes.find(R)->second;
+    auto F = SDNodes.find(R);
+    assert(F != SDNodes.end() && "Unknown node!");
+    return F->second;
   }
 
   // Node transformation lookups.
   typedef std::pair<Record*, std::string> NodeXForm;
   const NodeXForm &getSDNodeTransform(Record *R) const {
-    assert(SDNodeXForms.count(R) && "Invalid transform!");
-    return SDNodeXForms.find(R)->second;
+    auto F = SDNodeXForms.find(R);
+    assert(F != SDNodeXForms.end() && "Invalid transform!");
+    return F->second;
   }
 
   typedef std::map<Record*, NodeXForm, LessRecordByID>::const_iterator
@@ -762,8 +1088,9 @@ public:
 
 
   const ComplexPattern &getComplexPattern(Record *R) const {
-    assert(ComplexPatterns.count(R) && "Unknown addressing mode!");
-    return ComplexPatterns.find(R)->second;
+    auto F = ComplexPatterns.find(R);
+    assert(F != ComplexPatterns.end() && "Unknown addressing mode!");
+    return F->second;
   }
 
   const CodeGenIntrinsic &getIntrinsic(Record *R) const {
@@ -791,19 +1118,22 @@ public:
   }
 
   const DAGDefaultOperand &getDefaultOperand(Record *R) const {
-    assert(DefaultOperands.count(R) &&"Isn't an analyzed default operand!");
-    return DefaultOperands.find(R)->second;
+    auto F = DefaultOperands.find(R);
+    assert(F != DefaultOperands.end() &&"Isn't an analyzed default operand!");
+    return F->second;
   }
 
   // Pattern Fragment information.
   TreePattern *getPatternFragment(Record *R) const {
-    assert(PatternFragments.count(R) && "Invalid pattern fragment request!");
-    return PatternFragments.find(R)->second.get();
+    auto F = PatternFragments.find(R);
+    assert(F != PatternFragments.end() && "Invalid pattern fragment request!");
+    return F->second.get();
   }
   TreePattern *getPatternFragmentIfRead(Record *R) const {
-    if (!PatternFragments.count(R))
+    auto F = PatternFragments.find(R);
+    if (F == PatternFragments.end())
       return nullptr;
-    return PatternFragments.find(R)->second.get();
+    return F->second.get();
   }
 
   typedef std::map<Record *, std::unique_ptr<TreePattern>,
@@ -825,8 +1155,9 @@ public:
       DAGInstMap &DAGInsts);
 
   const DAGInstruction &getInstruction(Record *R) const {
-    assert(Instructions.count(R) && "Unknown instruction!");
-    return Instructions.find(R)->second;
+    auto F = Instructions.find(R);
+    assert(F != Instructions.end() && "Unknown instruction!");
+    return F->second;
   }
 
   Record *get_intrinsic_void_sdnode() const {
@@ -849,10 +1180,13 @@ private:
   void ParseDefaultOperands();
   void ParseInstructions();
   void ParsePatterns();
+  void ExpandHwModeBasedTypes();
   void InferInstructionFlags();
   void GenerateVariants();
   void VerifyInstructionFlags();
 
+  std::vector<Predicate> makePredList(ListInit *L);
+
   void AddPatternToMatch(TreePattern *Pattern, PatternToMatch &&PTM);
   void FindPatternInputsAndOutputs(TreePattern *I, TreePatternNode *Pat,
                                    std::map<std::string,
@@ -861,6 +1195,15 @@ private:
                                    TreePatternNode*> &InstResults,
                                    std::vector<Record*> &InstImpResults);
 };
+
+
+inline bool SDNodeInfo::ApplyTypeConstraints(TreePatternNode *N,
+                                             TreePattern &TP) const {
+    bool MadeChange = false;
+    for (unsigned i = 0, e = TypeConstraints.size(); i != e; ++i)
+      MadeChange |= TypeConstraints[i].ApplyTypeConstraint(N, *this, TP);
+    return MadeChange;
+  }
 } // end namespace llvm
 
 #endif
diff --git a/utils/TableGen/CodeGenHwModes.cpp b/utils/TableGen/CodeGenHwModes.cpp
new file mode 100644
index 000000000000..9f88d95275b4
--- /dev/null
+++ b/utils/TableGen/CodeGenHwModes.cpp
@@ -0,0 +1,114 @@
+//===--- CodeGenHwModes.cpp -----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Classes to parse and store HW mode information for instruction selection
+//===----------------------------------------------------------------------===//
+
+#include "CodeGenHwModes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+
+using namespace llvm;
+
+StringRef CodeGenHwModes::DefaultModeName = "DefaultMode";
+
+HwMode::HwMode(Record *R) {
+  Name = R->getName();
+  Features = R->getValueAsString("Features");
+}
+
+LLVM_DUMP_METHOD
+void HwMode::dump() const {
+  dbgs() << Name << ": " << Features << '\n';
+}
+
+HwModeSelect::HwModeSelect(Record *R, CodeGenHwModes &CGH) {
+  std::vector<Record*> Modes = R->getValueAsListOfDefs("Modes");
+  std::vector<Record*> Objects = R->getValueAsListOfDefs("Objects");
+  if (Modes.size() != Objects.size()) {
+    PrintError(R->getLoc(), "in record " + R->getName() +
+        " derived from HwModeSelect: the lists Modes and Objects should "
+        "have the same size");
+    report_fatal_error("error in target description.");
+  }
+  for (unsigned i = 0, e = Modes.size(); i != e; ++i) {
+    unsigned ModeId = CGH.getHwModeId(Modes[i]->getName());
+    Items.push_back(std::make_pair(ModeId, Objects[i]));
+  }
+}
+
+LLVM_DUMP_METHOD
+void HwModeSelect::dump() const {
+  dbgs() << '{';
+  for (const PairType &P : Items)
+    dbgs() << " (" << P.first << ',' << P.second->getName() << ')';
+  dbgs() << " }\n";
+}
+
+CodeGenHwModes::CodeGenHwModes(RecordKeeper &RK) : Records(RK) {
+  std::vector<Record*> MRs = Records.getAllDerivedDefinitions("HwMode");
+  // The default mode needs a definition in the .td sources for TableGen
+  // to accept references to it. We need to ignore the definition here.
+  for (auto I = MRs.begin(), E = MRs.end(); I != E; ++I) {
+    if ((*I)->getName() != DefaultModeName)
+      continue;
+    MRs.erase(I);
+    break;
+  }
+
+  for (Record *R : MRs) {
+    Modes.emplace_back(R);
+    unsigned NewId = Modes.size();
+    ModeIds.insert(std::make_pair(Modes[NewId-1].Name, NewId));
+  }
+
+  std::vector<Record*> MSs = Records.getAllDerivedDefinitions("HwModeSelect");
+  for (Record *R : MSs) {
+    auto P = ModeSelects.emplace(std::make_pair(R, HwModeSelect(R, *this)));
+    assert(P.second);
+    (void)P;
+  }
+}
+
+unsigned CodeGenHwModes::getHwModeId(StringRef Name) const {
+  if (Name == DefaultModeName)
+    return DefaultMode;
+  auto F = ModeIds.find(Name);
+  assert(F != ModeIds.end() && "Unknown mode name");
+  return F->second;
+}
+
+const HwModeSelect &CodeGenHwModes::getHwModeSelect(Record *R) const {
+  auto F = ModeSelects.find(R);
+  assert(F != ModeSelects.end() && "Record is not a \"mode select\"");
+  return F->second;
+}
+
+LLVM_DUMP_METHOD
+void CodeGenHwModes::dump() const {
+  dbgs() << "Modes: {\n";
+  for (const HwMode &M : Modes) {
+    dbgs() << "  ";
+    M.dump();
+  }
+  dbgs() << "}\n";
+
+  dbgs() << "ModeIds: {\n";
+  for (const auto &P : ModeIds)
+    dbgs() << "  " << P.first() << " -> " << P.second << '\n';
+  dbgs() << "}\n";
+
+  dbgs() << "ModeSelects: {\n";
+  for (const auto &P : ModeSelects) {
+    dbgs() << "  " << P.first->getName() << " -> ";
+    P.second.dump();
+  }
+  dbgs() << "}\n";
+}
diff --git a/utils/TableGen/CodeGenHwModes.h b/utils/TableGen/CodeGenHwModes.h
new file mode 100644
index 000000000000..36df835d1933
--- /dev/null
+++ b/utils/TableGen/CodeGenHwModes.h
@@ -0,0 +1,64 @@
+//===--- CodeGenHwModes.h ---------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Classes to parse and store HW mode information for instruction selection.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_TABLEGEN_CODEGENHWMODES_H
+#define LLVM_UTILS_TABLEGEN_CODEGENHWMODES_H
+
+#include "llvm/ADT/StringMap.h"
+#include <map>
+#include <string>
+#include <vector>
+
+// HwModeId -> list of predicates (definition)
+
+namespace llvm {
+  class Record;
+  class RecordKeeper;
+
+  struct CodeGenHwModes;
+
+  struct HwMode {
+    HwMode(Record *R);
+    StringRef Name;
+    std::string Features;
+    void dump() const;
+  };
+
+  struct HwModeSelect {
+    HwModeSelect(Record *R, CodeGenHwModes &CGH);
+    typedef std::pair<unsigned, Record*> PairType;
+    std::vector<PairType> Items;
+    void dump() const;
+  };
+
+  struct CodeGenHwModes {
+    enum : unsigned { DefaultMode = 0 };
+    static StringRef DefaultModeName;
+
+    CodeGenHwModes(RecordKeeper &R);
+    unsigned getHwModeId(StringRef Name) const;
+    const HwMode &getMode(unsigned Id) const {
+      assert(Id != 0 && "Mode id of 0 is reserved for the default mode");
+      return Modes[Id-1];
+    }
+    const HwModeSelect &getHwModeSelect(Record *R) const;
+    unsigned getNumModeIds() const { return Modes.size()+1; }
+    void dump() const;
+
+  private:
+    RecordKeeper &Records;
+    StringMap<unsigned> ModeIds;  // HwMode (string) -> HwModeId
+    std::vector<HwMode> Modes;
+    std::map<Record*,HwModeSelect> ModeSelects;
+  };
+}
+
+#endif // LLVM_UTILS_TABLEGEN_CODEGENHWMODES_H
diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp
index f4a760990999..44ee16f6fd74 100644
--- a/utils/TableGen/CodeGenInstruction.cpp
+++ b/utils/TableGen/CodeGenInstruction.cpp
@@ -128,8 +128,8 @@ CGIOperandList::CGIOperandList(Record *R) : TheDef(R) {
 
   // Make sure the constraints list for each operand is large enough to hold
   // constraint info, even if none is present.
-  for (unsigned i = 0, e = OperandList.size(); i != e; ++i)
-    OperandList[i].Constraints.resize(OperandList[i].MINumOperands);
+  for (OperandInfo &OpInfo : OperandList)
+    OpInfo.Constraints.resize(OpInfo.MINumOperands);
 }
 
 
@@ -375,10 +375,10 @@ HasOneImplicitDefWithKnownVT(const CodeGenTarget &TargetInfo) const {
   // Check to see if the first implicit def has a resolvable type.
   Record *FirstImplicitDef = ImplicitDefs[0];
   assert(FirstImplicitDef->isSubClassOf("Register"));
-  const std::vector<MVT::SimpleValueType> &RegVTs =
+  const std::vector<ValueTypeByHwMode> &RegVTs =
     TargetInfo.getRegisterVTs(FirstImplicitDef);
-  if (RegVTs.size() == 1)
-    return RegVTs[0];
+  if (RegVTs.size() == 1 && RegVTs[0].isSimple())
+    return RegVTs[0].getSimple().SimpleTy;
   return MVT::Other;
 }
 
@@ -430,6 +430,17 @@ FlattenAsmStringVariants(StringRef Cur, unsigned Variant) {
   return Res;
 }
 
+bool CodeGenInstruction::isOperandAPointer(unsigned i) const {
+  if (DagInit *ConstraintList = TheDef->getValueAsDag("InOperandList")) {
+    if (i < ConstraintList->getNumArgs()) {
+      if (DefInit *Constraint = dyn_cast<DefInit>(ConstraintList->getArg(i))) {
+        return Constraint->getDef()->isSubClassOf("TypedOperand") &&
+               Constraint->getDef()->getValueAsBit("IsPointer");
+      }
+    }
+  }
+  return false;
+}
 
 //===----------------------------------------------------------------------===//
 /// CodeGenInstAlias Implementation
diff --git a/utils/TableGen/CodeGenInstruction.h b/utils/TableGen/CodeGenInstruction.h
index e173e153879c..9cff95b1247f 100644
--- a/utils/TableGen/CodeGenInstruction.h
+++ b/utils/TableGen/CodeGenInstruction.h
@@ -284,6 +284,12 @@ template <typename T> class ArrayRef;
     /// include text from the specified variant, returning the new string.
     static std::string FlattenAsmStringVariants(StringRef AsmString,
                                                 unsigned Variant);
+
+    // Is the specified operand in a generic instruction implicitly a pointer.
+    // This can be used on intructions that use typeN or ptypeN to identify
+    // operands that should be considered as pointers even though SelectionDAG
+    // didn't make a distinction between integer and pointers.
+    bool isOperandAPointer(unsigned i) const;
   };
 
 
diff --git a/utils/TableGen/CodeGenRegisters.cpp b/utils/TableGen/CodeGenRegisters.cpp
index 6399fb5ec1dd..a6b0a4beb8ea 100644
--- a/utils/TableGen/CodeGenRegisters.cpp
+++ b/utils/TableGen/CodeGenRegisters.cpp
@@ -36,6 +36,7 @@
 #include <cstdint>
 #include <iterator>
 #include <map>
+#include <queue>
 #include <set>
 #include <string>
 #include <tuple>
@@ -96,9 +97,9 @@ void CodeGenSubRegIndex::updateComponents(CodeGenRegBank &RegBank) {
       PrintFatalError(TheDef->getLoc(),
                       "CoveredBySubRegs must have two or more entries");
     SmallVector<CodeGenSubRegIndex*, 8> IdxParts;
-    for (unsigned i = 0, e = Parts.size(); i != e; ++i)
-      IdxParts.push_back(RegBank.getSubRegIdx(Parts[i]));
-    RegBank.addConcatSubRegIndex(IdxParts, this);
+    for (Record *Part : Parts)
+      IdxParts.push_back(RegBank.getSubRegIdx(Part));
+    setConcatenationOf(IdxParts);
   }
 }
 
@@ -119,6 +120,36 @@ LaneBitmask CodeGenSubRegIndex::computeLaneMask() const {
   return LaneMask;
 }
 
+void CodeGenSubRegIndex::setConcatenationOf(
+    ArrayRef<CodeGenSubRegIndex*> Parts) {
+  if (ConcatenationOf.empty())
+    ConcatenationOf.assign(Parts.begin(), Parts.end());
+  else
+    assert(std::equal(Parts.begin(), Parts.end(),
+                      ConcatenationOf.begin()) && "parts consistent");
+}
+
+void CodeGenSubRegIndex::computeConcatTransitiveClosure() {
+  for (SmallVectorImpl<CodeGenSubRegIndex*>::iterator
+       I = ConcatenationOf.begin(); I != ConcatenationOf.end(); /*empty*/) {
+    CodeGenSubRegIndex *SubIdx = *I;
+    SubIdx->computeConcatTransitiveClosure();
+#ifndef NDEBUG
+    for (CodeGenSubRegIndex *SRI : SubIdx->ConcatenationOf)
+      assert(SRI->ConcatenationOf.empty() && "No transitive closure?");
+#endif
+
+    if (SubIdx->ConcatenationOf.empty()) {
+      ++I;
+    } else {
+      I = ConcatenationOf.erase(I);
+      I = ConcatenationOf.insert(I, SubIdx->ConcatenationOf.begin(),
+                                 SubIdx->ConcatenationOf.end());
+      I += SubIdx->ConcatenationOf.size();
+    }
+  }
+}
+
 //===----------------------------------------------------------------------===//
 //                              CodeGenRegister
 //===----------------------------------------------------------------------===//
@@ -158,8 +189,8 @@ void CodeGenRegister::buildObjectGraph(CodeGenRegBank &RegBank) {
   // Add ad hoc alias links. This is a symmetric relationship between two
   // registers, so build a symmetric graph by adding links in both ends.
   std::vector<Record*> Aliases = TheDef->getValueAsListOfDefs("Aliases");
-  for (unsigned i = 0, e = Aliases.size(); i != e; ++i) {
-    CodeGenRegister *Reg = RegBank.getReg(Aliases[i]);
+  for (Record *Alias : Aliases) {
+    CodeGenRegister *Reg = RegBank.getReg(Alias);
     ExplicitAliases.push_back(Reg);
     Reg->ExplicitAliases.push_back(this);
   }
@@ -223,9 +254,8 @@ static bool hasRegUnit(CodeGenRegister::RegUnitList &RegUnits, unsigned Unit) {
 // Return true if the RegUnits changed.
 bool CodeGenRegister::inheritRegUnits(CodeGenRegBank &RegBank) {
   bool changed = false;
-  for (SubRegMap::const_iterator I = SubRegs.begin(), E = SubRegs.end();
-       I != E; ++I) {
-    CodeGenRegister *SR = I->second;
+  for (const auto &SubReg : SubRegs) {
+    CodeGenRegister *SR = SubReg.second;
     // Merge the subregister's units into this register's RegUnits.
     changed |= (RegUnits |= SR->RegUnits);
   }
@@ -259,15 +289,13 @@ CodeGenRegister::computeSubRegs(CodeGenRegBank &RegBank) {
 
   // Clone inherited subregs and place duplicate entries in Orphans.
   // Here the order is important - earlier subregs take precedence.
-  for (unsigned i = 0, e = ExplicitSubRegs.size(); i != e; ++i) {
-    CodeGenRegister *SR = ExplicitSubRegs[i];
-    const SubRegMap &Map = SR->computeSubRegs(RegBank);
-    HasDisjunctSubRegs |= SR->HasDisjunctSubRegs;
+  for (CodeGenRegister *ESR : ExplicitSubRegs) {
+    const SubRegMap &Map = ESR->computeSubRegs(RegBank);
+    HasDisjunctSubRegs |= ESR->HasDisjunctSubRegs;
 
-    for (SubRegMap::const_iterator SI = Map.begin(), SE = Map.end(); SI != SE;
-         ++SI) {
-      if (!SubRegs.insert(*SI).second)
-        Orphans.insert(SI->second);
+    for (const auto &SR : Map) {
+      if (!SubRegs.insert(SR).second)
+        Orphans.insert(SR.second);
     }
   }
 
@@ -319,16 +347,14 @@ CodeGenRegister::computeSubRegs(CodeGenRegBank &RegBank) {
     CodeGenSubRegIndex *Idx = Indices.pop_back_val();
     CodeGenRegister *SR = SubRegs[Idx];
     const SubRegMap &Map = SR->computeSubRegs(RegBank);
-    for (SubRegMap::const_iterator SI = Map.begin(), SE = Map.end(); SI != SE;
-         ++SI)
-      if (Orphans.erase(SI->second))
-        SubRegs[RegBank.getCompositeSubRegIndex(Idx, SI->first)] = SI->second;
+    for (const auto &SubReg : Map)
+      if (Orphans.erase(SubReg.second))
+        SubRegs[RegBank.getCompositeSubRegIndex(Idx, SubReg.first)] = SubReg.second;
   }
 
   // Compute the inverse SubReg -> Idx map.
-  for (SubRegMap::const_iterator SI = SubRegs.begin(), SE = SubRegs.end();
-       SI != SE; ++SI) {
-    if (SI->second == this) {
+  for (const auto &SubReg : SubRegs) {
+    if (SubReg.second == this) {
       ArrayRef<SMLoc> Loc;
       if (TheDef)
         Loc = TheDef->getLoc();
@@ -338,20 +364,20 @@ CodeGenRegister::computeSubRegs(CodeGenRegBank &RegBank) {
 
     // Compute AllSuperRegsCovered.
     if (!CoveredBySubRegs)
-      SI->first->AllSuperRegsCovered = false;
+      SubReg.first->AllSuperRegsCovered = false;
 
     // Ensure that every sub-register has a unique name.
     DenseMap<const CodeGenRegister*, CodeGenSubRegIndex*>::iterator Ins =
-      SubReg2Idx.insert(std::make_pair(SI->second, SI->first)).first;
-    if (Ins->second == SI->first)
+      SubReg2Idx.insert(std::make_pair(SubReg.second, SubReg.first)).first;
+    if (Ins->second == SubReg.first)
       continue;
-    // Trouble: Two different names for SI->second.
+    // Trouble: Two different names for SubReg.second.
     ArrayRef<SMLoc> Loc;
     if (TheDef)
       Loc = TheDef->getLoc();
     PrintFatalError(Loc, "Sub-register can't have two names: " +
-                  SI->second->getName() + " available as " +
-                  SI->first->getName() + " and " + Ins->second->getName());
+                  SubReg.second->getName() + " available as " +
+                  SubReg.first->getName() + " and " + Ins->second->getName());
   }
 
   // Derive possible names for sub-register concatenations from any explicit
@@ -369,7 +395,8 @@ CodeGenRegister::computeSubRegs(CodeGenRegBank &RegBank) {
       Parts.push_back(getSubRegIndex(SR->ExplicitSubRegs[j]));
 
     // Offer this as an existing spelling for the concatenation of Parts.
-    RegBank.addConcatSubRegIndex(Parts, ExplicitSubRegIndices[i]);
+    CodeGenSubRegIndex &Idx = *ExplicitSubRegIndices[i];
+    Idx.setConcatenationOf(Parts);
   }
 
   // Initialize RegUnitList. Because getSubRegs is called recursively, this
@@ -430,14 +457,21 @@ CodeGenRegister::computeSubRegs(CodeGenRegBank &RegBank) {
 // sub-register relationships that would force a DAG.
 //
 void CodeGenRegister::computeSecondarySubRegs(CodeGenRegBank &RegBank) {
-  // Collect new sub-registers first, add them later.
   SmallVector<SubRegMap::value_type, 8> NewSubRegs;
 
+  std::queue<std::pair<CodeGenSubRegIndex*,CodeGenRegister*>> SubRegQueue;
+  for (std::pair<CodeGenSubRegIndex*,CodeGenRegister*> P : SubRegs)
+    SubRegQueue.push(P);
+
   // Look at the leading super-registers of each sub-register. Those are the
   // candidates for new sub-registers, assuming they are fully contained in
   // this register.
-  for (SubRegMap::iterator I = SubRegs.begin(), E = SubRegs.end(); I != E; ++I){
-    const CodeGenRegister *SubReg = I->second;
+  while (!SubRegQueue.empty()) {
+    CodeGenSubRegIndex *SubRegIdx;
+    const CodeGenRegister *SubReg;
+    std::tie(SubRegIdx, SubReg) = SubRegQueue.front();
+    SubRegQueue.pop();
+
     const CodeGenRegister::SuperRegList &Leads = SubReg->LeadingSuperRegs;
     for (unsigned i = 0, e = Leads.size(); i != e; ++i) {
       CodeGenRegister *Cand = const_cast<CodeGenRegister*>(Leads[i]);
@@ -445,41 +479,47 @@ void CodeGenRegister::computeSecondarySubRegs(CodeGenRegBank &RegBank) {
       if (Cand == this || getSubRegIndex(Cand))
         continue;
       // Check if each component of Cand is already a sub-register.
-      // We know that the first component is I->second, and is present with the
-      // name I->first.
-      SmallVector<CodeGenSubRegIndex*, 8> Parts(1, I->first);
       assert(!Cand->ExplicitSubRegs.empty() &&
              "Super-register has no sub-registers");
-      for (unsigned j = 1, e = Cand->ExplicitSubRegs.size(); j != e; ++j) {
-        if (CodeGenSubRegIndex *Idx = getSubRegIndex(Cand->ExplicitSubRegs[j]))
-          Parts.push_back(Idx);
-        else {
+      if (Cand->ExplicitSubRegs.size() == 1)
+        continue;
+      SmallVector<CodeGenSubRegIndex*, 8> Parts;
+      // We know that the first component is (SubRegIdx,SubReg). However we
+      // may still need to split it into smaller subregister parts.
+      assert(Cand->ExplicitSubRegs[0] == SubReg && "LeadingSuperRegs correct");
+      assert(getSubRegIndex(SubReg) == SubRegIdx && "LeadingSuperRegs correct");
+      for (CodeGenRegister *SubReg : Cand->ExplicitSubRegs) {
+        if (CodeGenSubRegIndex *SubRegIdx = getSubRegIndex(SubReg)) {
+          if (SubRegIdx->ConcatenationOf.empty()) {
+            Parts.push_back(SubRegIdx);
+          } else
+            for (CodeGenSubRegIndex *SubIdx : SubRegIdx->ConcatenationOf)
+              Parts.push_back(SubIdx);
+        } else {
           // Sub-register doesn't exist.
           Parts.clear();
           break;
         }
       }
-      // If some Cand sub-register is not part of this register, or if Cand only
-      // has one sub-register, there is nothing to do.
-      if (Parts.size() <= 1)
+      // There is nothing to do if some Cand sub-register is not part of this
+      // register.
+      if (Parts.empty())
         continue;
 
       // Each part of Cand is a sub-register of this. Make the full Cand also
       // a sub-register with a concatenated sub-register index.
-      CodeGenSubRegIndex *Concat= RegBank.getConcatSubRegIndex(Parts);
-      NewSubRegs.push_back(std::make_pair(Concat, Cand));
-    }
-  }
+      CodeGenSubRegIndex *Concat = RegBank.getConcatSubRegIndex(Parts);
+      std::pair<CodeGenSubRegIndex*,CodeGenRegister*> NewSubReg =
+          std::make_pair(Concat, Cand);
 
-  // Now add all the new sub-registers.
-  for (unsigned i = 0, e = NewSubRegs.size(); i != e; ++i) {
-    // Don't add Cand if another sub-register is already using the index.
-    if (!SubRegs.insert(NewSubRegs[i]).second)
-      continue;
+      if (!SubRegs.insert(NewSubReg).second)
+        continue;
 
-    CodeGenSubRegIndex *NewIdx = NewSubRegs[i].first;
-    CodeGenRegister *NewSubReg = NewSubRegs[i].second;
-    SubReg2Idx.insert(std::make_pair(NewSubReg, NewIdx));
+      // We inserted a new subregister.
+      NewSubRegs.push_back(NewSubReg);
+      SubRegQueue.push(NewSubReg);
+      SubReg2Idx.insert(std::make_pair(Cand, Concat));
+    }
   }
 
   // Create sub-register index composition maps for the synthesized indices.
@@ -686,7 +726,7 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R)
     if (!Type->isSubClassOf("ValueType"))
       PrintFatalError("RegTypes list member '" + Type->getName() +
         "' does not derive from the ValueType class!");
-    VTs.push_back(getValueType(Type));
+    VTs.push_back(getValueTypeByHwMode(Type, RegBank.getHwModes()));
   }
   assert(!VTs.empty() && "RegisterClass must contain at least one ValueType!");
 
@@ -719,12 +759,22 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R)
     }
   }
 
-  // Allow targets to override the size in bits of the RegisterClass.
+  Namespace = R->getValueAsString("Namespace");
+
+  if (const RecordVal *RV = R->getValue("RegInfos"))
+    if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue()))
+      RSI = RegSizeInfoByHwMode(DI->getDef(), RegBank.getHwModes());
   unsigned Size = R->getValueAsInt("Size");
+  assert((RSI.hasDefault() || Size != 0 || VTs[0].isSimple()) &&
+         "Impossible to determine register size");
+  if (!RSI.hasDefault()) {
+    RegSizeInfo RI;
+    RI.RegSize = RI.SpillSize = Size ? Size
+                                     : VTs[0].getSimple().getSizeInBits();
+    RI.SpillAlignment = R->getValueAsInt("Alignment");
+    RSI.Map.insert({DefaultMode, RI});
+  }
 
-  Namespace = R->getValueAsString("Namespace");
-  SpillSize = Size ? Size : MVT(VTs[0]).getSizeInBits();
-  SpillAlignment = R->getValueAsInt("Alignment");
   CopyCost = R->getValueAsInt("CopyCost");
   Allocatable = R->getValueAsBit("isAllocatable");
   AltOrderSelect = R->getValueAsString("AltOrderSelect");
@@ -744,8 +794,7 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank,
     Name(Name),
     TopoSigs(RegBank.getNumTopoSigs()),
     EnumValue(-1),
-    SpillSize(Props.SpillSize),
-    SpillAlignment(Props.SpillAlignment),
+    RSI(Props.RSI),
     CopyCost(0),
     Allocatable(true),
     AllocationPriority(0) {
@@ -787,7 +836,7 @@ bool CodeGenRegisterClass::contains(const CodeGenRegister *Reg) const {
 namespace llvm {
 
   raw_ostream &operator<<(raw_ostream &OS, const CodeGenRegisterClass::Key &K) {
-    OS << "{ S=" << K.SpillSize << ", A=" << K.SpillAlignment;
+    OS << "{ " << K.RSI;
     for (const auto R : *K.Members)
       OS << ", " << R->getName();
     return OS << " }";
@@ -800,8 +849,7 @@ namespace llvm {
 bool CodeGenRegisterClass::Key::
 operator<(const CodeGenRegisterClass::Key &B) const {
   assert(Members && B.Members);
-  return std::tie(*Members, SpillSize, SpillAlignment) <
-         std::tie(*B.Members, B.SpillSize, B.SpillAlignment);
+  return std::tie(*Members, RSI) < std::tie(*B.Members, B.RSI);
 }
 
 // Returns true if RC is a strict subclass.
@@ -815,8 +863,7 @@ operator<(const CodeGenRegisterClass::Key &B) const {
 //
 static bool testSubClass(const CodeGenRegisterClass *A,
                          const CodeGenRegisterClass *B) {
-  return A->SpillAlignment && B->SpillAlignment % A->SpillAlignment == 0 &&
-         A->SpillSize <= B->SpillSize &&
+  return A->RSI.isSubClassOf(B->RSI) &&
          std::includes(A->getMembers().begin(), A->getMembers().end(),
                        B->getMembers().begin(), B->getMembers().end(),
                        deref<llvm::less>());
@@ -835,16 +882,9 @@ static bool TopoOrderRC(const CodeGenRegisterClass &PA,
   if (A == B)
     return false;
 
-  // Order by ascending spill size.
-  if (A->SpillSize < B->SpillSize)
-    return true;
-  if (A->SpillSize > B->SpillSize)
-    return false;
-
-  // Order by ascending spill alignment.
-  if (A->SpillAlignment < B->SpillAlignment)
+  if (A->RSI < B->RSI)
     return true;
-  if (A->SpillAlignment > B->SpillAlignment)
+  if (A->RSI != B->RSI)
     return false;
 
   // Order by descending set size.  Note that the classes' allocation order may
@@ -1017,7 +1057,8 @@ void CodeGenRegisterClass::buildRegUnitSet(
 //                               CodeGenRegBank
 //===----------------------------------------------------------------------===//
 
-CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) {
+CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records,
+                               const CodeGenHwModes &Modes) : CGH(Modes) {
   // Configure register Sets to understand register classes and tuples.
   Sets.addFieldExpander("RegisterClass", "MemberList");
   Sets.addFieldExpander("CalleeSavedRegs", "SaveList");
@@ -1070,6 +1111,16 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) {
   for (auto &Reg : Registers)
     Reg.computeSubRegs(*this);
 
+  // Compute transitive closure of subregister index ConcatenationOf vectors
+  // and initialize ConcatIdx map.
+  for (CodeGenSubRegIndex &SRI : SubRegIndices) {
+    SRI.computeConcatTransitiveClosure();
+    if (!SRI.ConcatenationOf.empty())
+      ConcatIdx.insert(std::make_pair(
+          SmallVector<CodeGenSubRegIndex*,8>(SRI.ConcatenationOf.begin(),
+                                             SRI.ConcatenationOf.end()), &SRI));
+  }
+
   // Infer even more sub-registers by combining leading super-registers.
   for (auto &Reg : Registers)
     if (Reg.CoveredBySubRegs)
@@ -1147,7 +1198,7 @@ CodeGenRegBank::getOrCreateSubClass(const CodeGenRegisterClass *RC,
                                     const CodeGenRegister::Vec *Members,
                                     StringRef Name) {
   // Synthetic sub-class has the same size and alignment as RC.
-  CodeGenRegisterClass::Key K(Members, RC->SpillSize, RC->SpillAlignment);
+  CodeGenRegisterClass::Key K(Members, RC->RSI);
   RCKeyMap::const_iterator FoundI = Key2RC.find(K);
   if (FoundI != Key2RC.end())
     return FoundI->second;
@@ -1183,6 +1234,11 @@ CodeGenRegBank::getCompositeSubRegIndex(CodeGenSubRegIndex *A,
 CodeGenSubRegIndex *CodeGenRegBank::
 getConcatSubRegIndex(const SmallVector<CodeGenSubRegIndex *, 8> &Parts) {
   assert(Parts.size() > 1 && "Need two parts to concatenate");
+#ifndef NDEBUG
+  for (CodeGenSubRegIndex *Idx : Parts) {
+    assert(Idx->ConcatenationOf.empty() && "No transitive closure?");
+  }
+#endif
 
   // Look for an existing entry.
   CodeGenSubRegIndex *&Idx = ConcatIdx[Parts];
@@ -1208,6 +1264,7 @@ getConcatSubRegIndex(const SmallVector<CodeGenSubRegIndex *, 8> &Parts) {
   Idx = createSubRegIndex(Name, Parts.front()->getNamespace());
   Idx->Size = Size;
   Idx->Offset = isContinuous ? Parts.front()->Offset : -1;
+  Idx->ConcatenationOf.assign(Parts.begin(), Parts.end());
   return Idx;
 }
 
@@ -1295,9 +1352,7 @@ void CodeGenRegBank::computeSubRegLaneMasks() {
       // Moving from a class with no subregisters we just had a single lane:
       // The subregister must be a leaf subregister and only occupies 1 bit.
       // Move the bit from the class without subregisters into that position.
-      static_assert(sizeof(Idx.LaneMask.getAsInteger()) == 4,
-                    "Change Log2_32 to a proper one");
-      unsigned DstBit = Log2_32(Idx.LaneMask.getAsInteger());
+      unsigned DstBit = Idx.LaneMask.getHighestLane();
       assert(Idx.LaneMask == LaneBitmask::getLane(DstBit) &&
              "Must be a leaf subregister");
       MaskRolPair MaskRol = { LaneBitmask::getLane(0), (uint8_t)DstBit };
@@ -1328,9 +1383,7 @@ void CodeGenRegBank::computeSubRegLaneMasks() {
         assert(Composite->getComposites().empty());
 
         // Create Mask+Rotate operation and merge with existing ops if possible.
-        static_assert(sizeof(Composite->LaneMask.getAsInteger()) == 4,
-                      "Change Log2_32 to a proper one");
-        unsigned DstBit = Log2_32(Composite->LaneMask.getAsInteger());
+        unsigned DstBit = Composite->LaneMask.getHighestLane();
         int Shift = DstBit - SrcBit;
         uint8_t RotateLeft = Shift >= 0 ? (uint8_t)Shift
                                         : LaneBitmask::BitWidth + Shift;
@@ -1993,10 +2046,8 @@ void CodeGenRegBank::inferCommonSubClass(CodeGenRegisterClass *RC) {
       continue;
 
     // If RC1 and RC2 have different spill sizes or alignments, use the
-    // larger size for sub-classing.  If they are equal, prefer RC1.
-    if (RC2->SpillSize > RC1->SpillSize ||
-        (RC2->SpillSize == RC1->SpillSize &&
-         RC2->SpillAlignment > RC1->SpillAlignment))
+    // stricter one for sub-classing.  If they are equal, prefer RC1.
+    if (RC2->RSI.hasStricterSpillThan(RC1->RSI))
       std::swap(RC1, RC2);
 
     getOrCreateSubClass(RC1, &Intersection,
diff --git a/utils/TableGen/CodeGenRegisters.h b/utils/TableGen/CodeGenRegisters.h
index d0f96a035ea1..f2f1e6971af9 100644
--- a/utils/TableGen/CodeGenRegisters.h
+++ b/utils/TableGen/CodeGenRegisters.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_UTILS_TABLEGEN_CODEGENREGISTERS_H
 #define LLVM_UTILS_TABLEGEN_CODEGENREGISTERS_H
 
+#include "InfoByHwMode.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
@@ -72,6 +73,10 @@ namespace llvm {
     mutable LaneBitmask LaneMask;
     mutable SmallVector<MaskRolPair,1> CompositionLaneMaskTransform;
 
+    /// A list of subregister indexes concatenated resulting in this
+    /// subregister index. This is the reverse of CodeGenRegBank::ConcatIdx.
+    SmallVector<CodeGenSubRegIndex*,4> ConcatenationOf;
+
     // Are all super-registers containing this SubRegIndex covered by their
     // sub-registers?
     bool AllSuperRegsCovered;
@@ -123,6 +128,12 @@ namespace llvm {
     // Compute LaneMask from Composed. Return LaneMask.
     LaneBitmask computeLaneMask() const;
 
+    void setConcatenationOf(ArrayRef<CodeGenSubRegIndex*> Parts);
+
+    /// Replaces subregister indexes in the `ConcatenationOf` list with
+    /// list of subregisters they are composed of (if any). Do this recursively.
+    void computeConcatTransitiveClosure();
+
   private:
     CompMap Composed;
   };
@@ -309,9 +320,8 @@ namespace llvm {
   public:
     unsigned EnumValue;
     StringRef Namespace;
-    SmallVector<MVT::SimpleValueType, 4> VTs;
-    unsigned SpillSize;
-    unsigned SpillAlignment;
+    SmallVector<ValueTypeByHwMode, 4> VTs;
+    RegSizeInfoByHwMode RSI;
     int CopyCost;
     bool Allocatable;
     StringRef AltOrderSelect;
@@ -328,13 +338,10 @@ namespace llvm {
 
     const std::string &getName() const { return Name; }
     std::string getQualifiedName() const;
-    ArrayRef<MVT::SimpleValueType> getValueTypes() const {return VTs;}
-    bool hasValueType(MVT::SimpleValueType VT) const {
-      return std::find(VTs.begin(), VTs.end(), VT) != VTs.end();
-    }
+    ArrayRef<ValueTypeByHwMode> getValueTypes() const { return VTs; }
     unsigned getNumValueTypes() const { return VTs.size(); }
 
-    MVT::SimpleValueType getValueTypeNum(unsigned VTNum) const {
+    ValueTypeByHwMode getValueTypeNum(unsigned VTNum) const {
       if (VTNum < VTs.size())
         return VTs[VTNum];
       llvm_unreachable("VTNum greater than number of ValueTypes in RegClass!");
@@ -429,18 +436,15 @@ namespace llvm {
     // the topological order used for the EnumValues.
     struct Key {
       const CodeGenRegister::Vec *Members;
-      unsigned SpillSize;
-      unsigned SpillAlignment;
+      RegSizeInfoByHwMode RSI;
 
-      Key(const CodeGenRegister::Vec *M, unsigned S = 0, unsigned A = 0)
-        : Members(M), SpillSize(S), SpillAlignment(A) {}
+      Key(const CodeGenRegister::Vec *M, const RegSizeInfoByHwMode &I)
+        : Members(M), RSI(I) {}
 
       Key(const CodeGenRegisterClass &RC)
-        : Members(&RC.getMembers()),
-          SpillSize(RC.SpillSize),
-          SpillAlignment(RC.SpillAlignment) {}
+        : Members(&RC.getMembers()), RSI(RC.RSI) {}
 
-      // Lexicographical order of (Members, SpillSize, SpillAlignment).
+      // Lexicographical order of (Members, RegSizeInfoByHwMode).
       bool operator<(const Key&) const;
     };
 
@@ -503,6 +507,8 @@ namespace llvm {
   class CodeGenRegBank {
     SetTheory Sets;
 
+    const CodeGenHwModes &CGH;
+
     std::deque<CodeGenSubRegIndex> SubRegIndices;
     DenseMap<Record*, CodeGenSubRegIndex*> Def2SubRegIdx;
 
@@ -586,10 +592,12 @@ namespace llvm {
     void computeRegUnitLaneMasks();
 
   public:
-    CodeGenRegBank(RecordKeeper&);
+    CodeGenRegBank(RecordKeeper&, const CodeGenHwModes&);
 
     SetTheory &getSets() { return Sets; }
 
+    const CodeGenHwModes &getHwModes() const { return CGH; }
+
     // Sub-register indices. The first NumNamedIndices are defined by the user
     // in the .td files. The rest are synthesized such that all sub-registers
     // have a unique name.
@@ -609,12 +617,6 @@ namespace llvm {
     CodeGenSubRegIndex *
       getConcatSubRegIndex(const SmallVector<CodeGenSubRegIndex *, 8>&);
 
-    void
-    addConcatSubRegIndex(const SmallVector<CodeGenSubRegIndex *, 8> &Parts,
-                         CodeGenSubRegIndex *Idx) {
-      ConcatIdx.insert(std::make_pair(Parts, Idx));
-    }
-
     const std::deque<CodeGenRegister> &getRegisters() { return Registers; }
 
     const StringMap<CodeGenRegister*> &getRegistersByName() {
diff --git a/utils/TableGen/CodeGenSchedule.cpp b/utils/TableGen/CodeGenSchedule.cpp
index 50569b2ad989..b753e19a5443 100644
--- a/utils/TableGen/CodeGenSchedule.cpp
+++ b/utils/TableGen/CodeGenSchedule.cpp
@@ -64,9 +64,8 @@ struct InstRegexOp : public SetTheory::Operator {
   void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
              ArrayRef<SMLoc> Loc) override {
     SmallVector<Regex, 4> RegexList;
-    for (DagInit::const_arg_iterator
-           AI = Expr->arg_begin(), AE = Expr->arg_end(); AI != AE; ++AI) {
-      StringInit *SI = dyn_cast<StringInit>(*AI);
+    for (Init *Arg : make_range(Expr->arg_begin(), Expr->arg_end())) {
+      StringInit *SI = dyn_cast<StringInit>(Arg);
       if (!SI)
         PrintFatalError(Loc, "instregex requires pattern string: "
           + Expr->getAsString());
@@ -162,8 +161,8 @@ void CodeGenSchedModels::collectProcModels() {
 
   // For each processor, find a unique machine model.
   DEBUG(dbgs() << "+++ PROCESSOR MODELs (addProcModel) +++\n");
-  for (unsigned i = 0, N = ProcRecords.size(); i < N; ++i)
-    addProcModel(ProcRecords[i]);
+  for (Record *ProcRecord : ProcRecords)
+    addProcModel(ProcRecord);
 }
 
 /// Get a unique processor model based on the defined MachineModel and
@@ -194,20 +193,20 @@ static void scanSchedRW(Record *RWDef, RecVec &RWDefs,
   if (!RWSet.insert(RWDef).second)
     return;
   RWDefs.push_back(RWDef);
-  // Reads don't current have sequence records, but it can be added later.
+  // Reads don't currently have sequence records, but it can be added later.
   if (RWDef->isSubClassOf("WriteSequence")) {
     RecVec Seq = RWDef->getValueAsListOfDefs("Writes");
-    for (RecIter I = Seq.begin(), E = Seq.end(); I != E; ++I)
-      scanSchedRW(*I, RWDefs, RWSet);
+    for (Record *WSRec : Seq)
+      scanSchedRW(WSRec, RWDefs, RWSet);
   }
   else if (RWDef->isSubClassOf("SchedVariant")) {
     // Visit each variant (guarded by a different predicate).
     RecVec Vars = RWDef->getValueAsListOfDefs("Variants");
-    for (RecIter VI = Vars.begin(), VE = Vars.end(); VI != VE; ++VI) {
+    for (Record *Variant : Vars) {
       // Visit each RW in the sequence selected by the current variant.
-      RecVec Selected = (*VI)->getValueAsListOfDefs("Selected");
-      for (RecIter I = Selected.begin(), E = Selected.end(); I != E; ++I)
-        scanSchedRW(*I, RWDefs, RWSet);
+      RecVec Selected = Variant->getValueAsListOfDefs("Selected");
+      for (Record *SelDef : Selected)
+        scanSchedRW(SelDef, RWDefs, RWSet);
     }
   }
 }
@@ -228,42 +227,40 @@ void CodeGenSchedModels::collectSchedRW() {
     if (SchedDef->isValueUnset("SchedRW"))
       continue;
     RecVec RWs = SchedDef->getValueAsListOfDefs("SchedRW");
-    for (RecIter RWI = RWs.begin(), RWE = RWs.end(); RWI != RWE; ++RWI) {
-      if ((*RWI)->isSubClassOf("SchedWrite"))
-        scanSchedRW(*RWI, SWDefs, RWSet);
+    for (Record *RW : RWs) {
+      if (RW->isSubClassOf("SchedWrite"))
+        scanSchedRW(RW, SWDefs, RWSet);
       else {
-        assert((*RWI)->isSubClassOf("SchedRead") && "Unknown SchedReadWrite");
-        scanSchedRW(*RWI, SRDefs, RWSet);
+        assert(RW->isSubClassOf("SchedRead") && "Unknown SchedReadWrite");
+        scanSchedRW(RW, SRDefs, RWSet);
       }
     }
   }
   // Find all ReadWrites referenced by InstRW.
   RecVec InstRWDefs = Records.getAllDerivedDefinitions("InstRW");
-  for (RecIter OI = InstRWDefs.begin(), OE = InstRWDefs.end(); OI != OE; ++OI) {
+  for (Record *InstRWDef : InstRWDefs) {
     // For all OperandReadWrites.
-    RecVec RWDefs = (*OI)->getValueAsListOfDefs("OperandReadWrites");
-    for (RecIter RWI = RWDefs.begin(), RWE = RWDefs.end();
-         RWI != RWE; ++RWI) {
-      if ((*RWI)->isSubClassOf("SchedWrite"))
-        scanSchedRW(*RWI, SWDefs, RWSet);
+    RecVec RWDefs = InstRWDef->getValueAsListOfDefs("OperandReadWrites");
+    for (Record *RWDef : RWDefs) {
+      if (RWDef->isSubClassOf("SchedWrite"))
+        scanSchedRW(RWDef, SWDefs, RWSet);
       else {
-        assert((*RWI)->isSubClassOf("SchedRead") && "Unknown SchedReadWrite");
-        scanSchedRW(*RWI, SRDefs, RWSet);
+        assert(RWDef->isSubClassOf("SchedRead") && "Unknown SchedReadWrite");
+        scanSchedRW(RWDef, SRDefs, RWSet);
       }
     }
   }
   // Find all ReadWrites referenced by ItinRW.
   RecVec ItinRWDefs = Records.getAllDerivedDefinitions("ItinRW");
-  for (RecIter II = ItinRWDefs.begin(), IE = ItinRWDefs.end(); II != IE; ++II) {
+  for (Record *ItinRWDef : ItinRWDefs) {
     // For all OperandReadWrites.
-    RecVec RWDefs = (*II)->getValueAsListOfDefs("OperandReadWrites");
-    for (RecIter RWI = RWDefs.begin(), RWE = RWDefs.end();
-         RWI != RWE; ++RWI) {
-      if ((*RWI)->isSubClassOf("SchedWrite"))
-        scanSchedRW(*RWI, SWDefs, RWSet);
+    RecVec RWDefs = ItinRWDef->getValueAsListOfDefs("OperandReadWrites");
+    for (Record *RWDef : RWDefs) {
+      if (RWDef->isSubClassOf("SchedWrite"))
+        scanSchedRW(RWDef, SWDefs, RWSet);
       else {
-        assert((*RWI)->isSubClassOf("SchedRead") && "Unknown SchedReadWrite");
-        scanSchedRW(*RWI, SRDefs, RWSet);
+        assert(RWDef->isSubClassOf("SchedRead") && "Unknown SchedReadWrite");
+        scanSchedRW(RWDef, SRDefs, RWSet);
       }
     }
   }
@@ -271,50 +268,49 @@ void CodeGenSchedModels::collectSchedRW() {
   // for the loop below that initializes Alias vectors.
   RecVec AliasDefs = Records.getAllDerivedDefinitions("SchedAlias");
   std::sort(AliasDefs.begin(), AliasDefs.end(), LessRecord());
-  for (RecIter AI = AliasDefs.begin(), AE = AliasDefs.end(); AI != AE; ++AI) {
-    Record *MatchDef = (*AI)->getValueAsDef("MatchRW");
-    Record *AliasDef = (*AI)->getValueAsDef("AliasRW");
+  for (Record *ADef : AliasDefs) {
+    Record *MatchDef = ADef->getValueAsDef("MatchRW");
+    Record *AliasDef = ADef->getValueAsDef("AliasRW");
     if (MatchDef->isSubClassOf("SchedWrite")) {
       if (!AliasDef->isSubClassOf("SchedWrite"))
-        PrintFatalError((*AI)->getLoc(), "SchedWrite Alias must be SchedWrite");
+        PrintFatalError(ADef->getLoc(), "SchedWrite Alias must be SchedWrite");
       scanSchedRW(AliasDef, SWDefs, RWSet);
     }
     else {
       assert(MatchDef->isSubClassOf("SchedRead") && "Unknown SchedReadWrite");
       if (!AliasDef->isSubClassOf("SchedRead"))
-        PrintFatalError((*AI)->getLoc(), "SchedRead Alias must be SchedRead");
+        PrintFatalError(ADef->getLoc(), "SchedRead Alias must be SchedRead");
       scanSchedRW(AliasDef, SRDefs, RWSet);
     }
   }
   // Sort and add the SchedReadWrites directly referenced by instructions or
   // itinerary resources. Index reads and writes in separate domains.
   std::sort(SWDefs.begin(), SWDefs.end(), LessRecord());
-  for (RecIter SWI = SWDefs.begin(), SWE = SWDefs.end(); SWI != SWE; ++SWI) {
-    assert(!getSchedRWIdx(*SWI, /*IsRead=*/false) && "duplicate SchedWrite");
-    SchedWrites.emplace_back(SchedWrites.size(), *SWI);
+  for (Record *SWDef : SWDefs) {
+    assert(!getSchedRWIdx(SWDef, /*IsRead=*/false) && "duplicate SchedWrite");
+    SchedWrites.emplace_back(SchedWrites.size(), SWDef);
   }
   std::sort(SRDefs.begin(), SRDefs.end(), LessRecord());
-  for (RecIter SRI = SRDefs.begin(), SRE = SRDefs.end(); SRI != SRE; ++SRI) {
-    assert(!getSchedRWIdx(*SRI, /*IsRead-*/true) && "duplicate SchedWrite");
-    SchedReads.emplace_back(SchedReads.size(), *SRI);
+  for (Record *SRDef : SRDefs) {
+    assert(!getSchedRWIdx(SRDef, /*IsRead-*/true) && "duplicate SchedWrite");
+    SchedReads.emplace_back(SchedReads.size(), SRDef);
   }
   // Initialize WriteSequence vectors.
-  for (std::vector<CodeGenSchedRW>::iterator WI = SchedWrites.begin(),
-         WE = SchedWrites.end(); WI != WE; ++WI) {
-    if (!WI->IsSequence)
+  for (CodeGenSchedRW &CGRW : SchedWrites) {
+    if (!CGRW.IsSequence)
       continue;
-    findRWs(WI->TheDef->getValueAsListOfDefs("Writes"), WI->Sequence,
+    findRWs(CGRW.TheDef->getValueAsListOfDefs("Writes"), CGRW.Sequence,
             /*IsRead=*/false);
   }
   // Initialize Aliases vectors.
-  for (RecIter AI = AliasDefs.begin(), AE = AliasDefs.end(); AI != AE; ++AI) {
-    Record *AliasDef = (*AI)->getValueAsDef("AliasRW");
+  for (Record *ADef : AliasDefs) {
+    Record *AliasDef = ADef->getValueAsDef("AliasRW");
     getSchedRW(AliasDef).IsAlias = true;
-    Record *MatchDef = (*AI)->getValueAsDef("MatchRW");
+    Record *MatchDef = ADef->getValueAsDef("MatchRW");
     CodeGenSchedRW &RW = getSchedRW(MatchDef);
     if (RW.IsAlias)
-      PrintFatalError((*AI)->getLoc(), "Cannot Alias an Alias");
-    RW.Aliases.push_back(*AI);
+      PrintFatalError(ADef->getLoc(), "Cannot Alias an Alias");
+    RW.Aliases.push_back(ADef);
   }
   DEBUG(
     dbgs() << "\n+++ SCHED READS and WRITES (collectSchedRW) +++\n";
@@ -329,12 +325,11 @@ void CodeGenSchedModels::collectSchedRW() {
       dbgs() << '\n';
     }
     RecVec RWDefs = Records.getAllDerivedDefinitions("SchedReadWrite");
-    for (RecIter RI = RWDefs.begin(), RE = RWDefs.end();
-         RI != RE; ++RI) {
-      if (!getSchedRWIdx(*RI, (*RI)->isSubClassOf("SchedRead"))) {
-        const std::string &Name = (*RI)->getName();
+    for (Record *RWDef : RWDefs) {
+      if (!getSchedRWIdx(RWDef, RWDef->isSubClassOf("SchedRead"))) {
+        const std::string &Name = RWDef->getName();
         if (Name != "NoWrite" && Name != "ReadDefault")
-          dbgs() << "Unused SchedReadWrite " << (*RI)->getName() << '\n';
+          dbgs() << "Unused SchedReadWrite " << RWDef->getName() << '\n';
       }
     });
 }
@@ -364,8 +359,8 @@ unsigned CodeGenSchedModels::getSchedRWIdx(Record *Def, bool IsRead,
 }
 
 bool CodeGenSchedModels::hasReadOfWrite(Record *WriteDef) const {
-  for (unsigned i = 0, e = SchedReads.size(); i < e; ++i) {
-    Record *ReadDef = SchedReads[i].TheDef;
+  for (const CodeGenSchedRW &Read : SchedReads) {
+    Record *ReadDef = Read.TheDef;
     if (!ReadDef || !ReadDef->isSubClassOf("ProcReadAdvance"))
       continue;
 
@@ -381,12 +376,12 @@ namespace llvm {
 
 void splitSchedReadWrites(const RecVec &RWDefs,
                           RecVec &WriteDefs, RecVec &ReadDefs) {
-  for (RecIter RWI = RWDefs.begin(), RWE = RWDefs.end(); RWI != RWE; ++RWI) {
-    if ((*RWI)->isSubClassOf("SchedWrite"))
-      WriteDefs.push_back(*RWI);
+  for (Record *RWDef : RWDefs) {
+    if (RWDef->isSubClassOf("SchedWrite"))
+      WriteDefs.push_back(RWDef);
     else {
-      assert((*RWI)->isSubClassOf("SchedRead") && "unknown SchedReadWrite");
-      ReadDefs.push_back(*RWI);
+      assert(RWDef->isSubClassOf("SchedRead") && "unknown SchedReadWrite");
+      ReadDefs.push_back(RWDef);
     }
   }
 }
@@ -406,8 +401,8 @@ void CodeGenSchedModels::findRWs(const RecVec &RWDefs,
 // Call getSchedRWIdx for all elements in a sequence of SchedRW defs.
 void CodeGenSchedModels::findRWs(const RecVec &RWDefs, IdxVec &RWs,
                                  bool IsRead) const {
-  for (RecIter RI = RWDefs.begin(), RE = RWDefs.end(); RI != RE; ++RI) {
-    unsigned Idx = getSchedRWIdx(*RI, IsRead);
+  for (Record *RWDef : RWDefs) {
+    unsigned Idx = getSchedRWIdx(RWDef, IsRead);
     assert(Idx && "failed to collect SchedReadWrite");
     RWs.push_back(Idx);
   }
@@ -423,9 +418,8 @@ void CodeGenSchedModels::expandRWSequence(unsigned RWIdx, IdxVec &RWSeq,
   int Repeat =
     SchedRW.TheDef ? SchedRW.TheDef->getValueAsInt("Repeat") : 1;
   for (int i = 0; i < Repeat; ++i) {
-    for (IdxIter I = SchedRW.Sequence.begin(), E = SchedRW.Sequence.end();
-         I != E; ++I) {
-      expandRWSequence(*I, RWSeq, IsRead);
+    for (unsigned I : SchedRW.Sequence) {
+      expandRWSequence(I, RWSeq, IsRead);
     }
   }
 }
@@ -464,9 +458,8 @@ void CodeGenSchedModels::expandRWSeqForProc(
   int Repeat =
     SchedWrite.TheDef ? SchedWrite.TheDef->getValueAsInt("Repeat") : 1;
   for (int i = 0; i < Repeat; ++i) {
-    for (IdxIter I = SchedWrite.Sequence.begin(), E = SchedWrite.Sequence.end();
-         I != E; ++I) {
-      expandRWSeqForProc(*I, RWSeq, IsRead, ProcModel);
+    for (unsigned I : SchedWrite.Sequence) {
+      expandRWSeqForProc(I, RWSeq, IsRead, ProcModel);
     }
   }
 }
@@ -535,8 +528,8 @@ void CodeGenSchedModels::collectSchedClasses() {
   RecVec InstRWDefs = Records.getAllDerivedDefinitions("InstRW");
   std::sort(InstRWDefs.begin(), InstRWDefs.end(), LessRecord());
   DEBUG(dbgs() << "\n+++ SCHED CLASSES (createInstRWClass) +++\n");
-  for (RecIter OI = InstRWDefs.begin(), OE = InstRWDefs.end(); OI != OE; ++OI)
-    createInstRWClass(*OI);
+  for (Record *RWDef : InstRWDefs)
+    createInstRWClass(RWDef);
 
   NumInstrSchedClasses = SchedClasses.size();
 
@@ -575,29 +568,27 @@ void CodeGenSchedModels::collectSchedClasses() {
       dbgs() << '\n';
     }
     const RecVec &RWDefs = SchedClasses[SCIdx].InstRWs;
-    for (RecIter RWI = RWDefs.begin(), RWE = RWDefs.end();
-         RWI != RWE; ++RWI) {
+    for (Record *RWDef : RWDefs) {
       const CodeGenProcModel &ProcModel =
-        getProcModel((*RWI)->getValueAsDef("SchedModel"));
+        getProcModel(RWDef->getValueAsDef("SchedModel"));
       ProcIndices.push_back(ProcModel.Index);
       dbgs() << "InstRW on " << ProcModel.ModelName << " for " << InstName;
       IdxVec Writes;
       IdxVec Reads;
-      findRWs((*RWI)->getValueAsListOfDefs("OperandReadWrites"),
+      findRWs(RWDef->getValueAsListOfDefs("OperandReadWrites"),
               Writes, Reads);
-      for (IdxIter WI = Writes.begin(), WE = Writes.end(); WI != WE; ++WI)
-        dbgs() << " " << SchedWrites[*WI].Name;
-      for (IdxIter RI = Reads.begin(), RE = Reads.end(); RI != RE; ++RI)
-        dbgs() << " " << SchedReads[*RI].Name;
+      for (unsigned WIdx : Writes)
+        dbgs() << " " << SchedWrites[WIdx].Name;
+      for (unsigned RIdx : Reads)
+        dbgs() << " " << SchedReads[RIdx].Name;
       dbgs() << '\n';
     }
     // If ProcIndices contains zero, the class applies to all processors.
     if (!std::count(ProcIndices.begin(), ProcIndices.end(), 0)) {
-      for (std::vector<CodeGenProcModel>::iterator PI = ProcModels.begin(),
-             PE = ProcModels.end(); PI != PE; ++PI) {
-        if (!std::count(ProcIndices.begin(), ProcIndices.end(), PI->Index))
+      for (const CodeGenProcModel &PM : ProcModels) {
+        if (!std::count(ProcIndices.begin(), ProcIndices.end(), PM.Index))
           dbgs() << "No machine model for " << Inst->TheDef->getName()
-                 << " on processor " << PI->ModelName << '\n';
+                 << " on processor " << PM.ModelName << '\n';
       }
     }
   }
@@ -700,10 +691,10 @@ void CodeGenSchedModels::createInstRWClass(Record *InstRWDef) {
   if (InstDefs->empty())
     PrintFatalError(InstRWDef->getLoc(), "No matching instruction opcodes");
 
-  for (RecIter I = InstDefs->begin(), E = InstDefs->end(); I != E; ++I) {
-    InstClassMapTy::const_iterator Pos = InstrClassMap.find(*I);
+  for (Record *InstDef : make_range(InstDefs->begin(), InstDefs->end())) {
+    InstClassMapTy::const_iterator Pos = InstrClassMap.find(InstDef);
     if (Pos == InstrClassMap.end())
-      PrintFatalError((*I)->getLoc(), "No sched class for instruction.");
+      PrintFatalError(InstDef->getLoc(), "No sched class for instruction.");
     unsigned SCIdx = Pos->second;
     unsigned CIdx = 0, CEnd = ClassInstrs.size();
     for (; CIdx != CEnd; ++CIdx) {
@@ -714,7 +705,7 @@ void CodeGenSchedModels::createInstRWClass(Record *InstRWDef) {
       ClassInstrs.resize(CEnd + 1);
       ClassInstrs[CIdx].first = SCIdx;
     }
-    ClassInstrs[CIdx].second.push_back(*I);
+    ClassInstrs[CIdx].second.push_back(InstDef);
   }
   // For each set of Instrs, create a new class if necessary, and map or remap
   // the Instrs to it.
@@ -729,9 +720,8 @@ void CodeGenSchedModels::createInstRWClass(Record *InstRWDef) {
       if (!RWDefs.empty()) {
         const RecVec *OrigInstDefs = Sets.expand(RWDefs[0]);
         unsigned OrigNumInstrs = 0;
-        for (RecIter I = OrigInstDefs->begin(), E = OrigInstDefs->end();
-             I != E; ++I) {
-          if (InstrClassMap[*I] == OldSCIdx)
+        for (Record *OIDef : make_range(OrigInstDefs->begin(), OrigInstDefs->end())) {
+          if (InstrClassMap[OIDef] == OldSCIdx)
             ++OrigNumInstrs;
         }
         if (OrigNumInstrs == InstDefs.size()) {
@@ -785,9 +775,8 @@ void CodeGenSchedModels::createInstRWClass(Record *InstRWDef) {
 
 // True if collectProcItins found anything.
 bool CodeGenSchedModels::hasItineraries() const {
-  for (CodeGenSchedModels::ProcIter PI = procModelBegin(), PE = procModelEnd();
-       PI != PE; ++PI) {
-    if (PI->hasItineraries())
+  for (const CodeGenProcModel &PM : make_range(procModelBegin(),procModelEnd())) {
+    if (PM.hasItineraries())
       return true;
   }
   return false;
@@ -808,8 +797,7 @@ void CodeGenSchedModels::collectProcItins() {
 
     // Insert each itinerary data record in the correct position within
     // the processor model's ItinDefList.
-    for (unsigned i = 0, N = ItinRecords.size(); i < N; i++) {
-      Record *ItinData = ItinRecords[i];
+    for (Record *ItinData : ItinRecords) {
       Record *ItinDef = ItinData->getValueAsDef("TheClass");
       bool FoundClass = false;
       for (SchedClassIter SCI = schedClassBegin(), SCE = schedClassEnd();
@@ -841,16 +829,16 @@ void CodeGenSchedModels::collectProcItins() {
 void CodeGenSchedModels::collectProcItinRW() {
   RecVec ItinRWDefs = Records.getAllDerivedDefinitions("ItinRW");
   std::sort(ItinRWDefs.begin(), ItinRWDefs.end(), LessRecord());
-  for (RecIter II = ItinRWDefs.begin(), IE = ItinRWDefs.end(); II != IE; ++II) {
-    if (!(*II)->getValueInit("SchedModel")->isComplete())
-      PrintFatalError((*II)->getLoc(), "SchedModel is undefined");
-    Record *ModelDef = (*II)->getValueAsDef("SchedModel");
+  for (Record *RWDef  : ItinRWDefs) {
+    if (!RWDef->getValueInit("SchedModel")->isComplete())
+      PrintFatalError(RWDef->getLoc(), "SchedModel is undefined");
+    Record *ModelDef = RWDef->getValueAsDef("SchedModel");
     ProcModelMapTy::const_iterator I = ProcModelMap.find(ModelDef);
     if (I == ProcModelMap.end()) {
-      PrintFatalError((*II)->getLoc(), "Undefined SchedMachineModel "
+      PrintFatalError(RWDef->getLoc(), "Undefined SchedMachineModel "
                     + ModelDef->getName());
     }
-    ProcModels[I->second].ItinRWDefs.push_back(*II);
+    ProcModels[I->second].ItinRWDefs.push_back(RWDef);
   }
 }
 
@@ -1006,12 +994,11 @@ private:
 // conditions implicitly negate any prior condition.
 bool PredTransitions::mutuallyExclusive(Record *PredDef,
                                         ArrayRef<PredCheck> Term) {
-  for (ArrayRef<PredCheck>::iterator I = Term.begin(), E = Term.end();
-       I != E; ++I) {
-    if (I->Predicate == PredDef)
+  for (const PredCheck &PC: Term) {
+    if (PC.Predicate == PredDef)
       return false;
 
-    const CodeGenSchedRW &SchedRW = SchedModels.getSchedRW(I->RWIdx, I->IsRead);
+    const CodeGenSchedRW &SchedRW = SchedModels.getSchedRW(PC.RWIdx, PC.IsRead);
     assert(SchedRW.HasVariants && "PredCheck must refer to a SchedVariant");
     RecVec Variants = SchedRW.TheDef->getValueAsListOfDefs("Variants");
     for (RecIter VI = Variants.begin(), VE = Variants.end(); VI != VE; ++VI) {
@@ -1027,9 +1014,9 @@ static bool hasAliasedVariants(const CodeGenSchedRW &RW,
   if (RW.HasVariants)
     return true;
 
-  for (RecIter I = RW.Aliases.begin(), E = RW.Aliases.end(); I != E; ++I) {
+  for (Record *Alias : RW.Aliases) {
     const CodeGenSchedRW &AliasRW =
-      SchedModels.getSchedRW((*I)->getValueAsDef("AliasRW"));
+      SchedModels.getSchedRW(Alias->getValueAsDef("AliasRW"));
     if (AliasRW.HasVariants)
       return true;
     if (AliasRW.IsSequence) {
@@ -1092,8 +1079,8 @@ void PredTransitions::getIntersectingVariants(
     }
     // Push each variant. Assign TransVecIdx later.
     const RecVec VarDefs = SchedRW.TheDef->getValueAsListOfDefs("Variants");
-    for (RecIter RI = VarDefs.begin(), RE = VarDefs.end(); RI != RE; ++RI)
-      Variants.push_back(TransVariant(*RI, SchedRW.Index, VarProcIdx, 0));
+    for (Record *VarDef : VarDefs)
+      Variants.push_back(TransVariant(VarDef, SchedRW.Index, VarProcIdx, 0));
     if (VarProcIdx == 0)
       GenericRW = true;
   }
@@ -1112,8 +1099,8 @@ void PredTransitions::getIntersectingVariants(
 
     if (AliasRW.HasVariants) {
       const RecVec VarDefs = AliasRW.TheDef->getValueAsListOfDefs("Variants");
-      for (RecIter RI = VarDefs.begin(), RE = VarDefs.end(); RI != RE; ++RI)
-        Variants.push_back(TransVariant(*RI, AliasRW.Index, AliasProcIdx, 0));
+      for (Record *VD : VarDefs)
+        Variants.push_back(TransVariant(VD, AliasRW.Index, AliasProcIdx, 0));
     }
     if (AliasRW.IsSequence) {
       Variants.push_back(
@@ -1122,12 +1109,11 @@ void PredTransitions::getIntersectingVariants(
     if (AliasProcIdx == 0)
       GenericRW = true;
   }
-  for (unsigned VIdx = 0, VEnd = Variants.size(); VIdx != VEnd; ++VIdx) {
-    TransVariant &Variant = Variants[VIdx];
+  for (TransVariant &Variant : Variants) {
     // Don't expand variants if the processor models don't intersect.
     // A zero processor index means any processor.
     SmallVectorImpl<unsigned> &ProcIndices = TransVec[TransIdx].ProcIndices;
-    if (ProcIndices[0] && Variants[VIdx].ProcIdx) {
+    if (ProcIndices[0] && Variant.ProcIdx) {
       unsigned Cnt = std::count(ProcIndices.begin(), ProcIndices.end(),
                                 Variant.ProcIdx);
       if (!Cnt)
@@ -1492,37 +1478,36 @@ void CodeGenSchedModels::collectProcResources() {
   }
   // Add resources separately defined by each subtarget.
   RecVec WRDefs = Records.getAllDerivedDefinitions("WriteRes");
-  for (RecIter WRI = WRDefs.begin(), WRE = WRDefs.end(); WRI != WRE; ++WRI) {
-    Record *ModelDef = (*WRI)->getValueAsDef("SchedModel");
-    addWriteRes(*WRI, getProcModel(ModelDef).Index);
+  for (Record *WR : WRDefs) {
+    Record *ModelDef = WR->getValueAsDef("SchedModel");
+    addWriteRes(WR, getProcModel(ModelDef).Index);
   }
   RecVec SWRDefs = Records.getAllDerivedDefinitions("SchedWriteRes");
-  for (RecIter WRI = SWRDefs.begin(), WRE = SWRDefs.end(); WRI != WRE; ++WRI) {
-    Record *ModelDef = (*WRI)->getValueAsDef("SchedModel");
-    addWriteRes(*WRI, getProcModel(ModelDef).Index);
+  for (Record *SWR : SWRDefs) {
+    Record *ModelDef = SWR->getValueAsDef("SchedModel");
+    addWriteRes(SWR, getProcModel(ModelDef).Index);
   }
   RecVec RADefs = Records.getAllDerivedDefinitions("ReadAdvance");
-  for (RecIter RAI = RADefs.begin(), RAE = RADefs.end(); RAI != RAE; ++RAI) {
-    Record *ModelDef = (*RAI)->getValueAsDef("SchedModel");
-    addReadAdvance(*RAI, getProcModel(ModelDef).Index);
+  for (Record *RA : RADefs) {
+    Record *ModelDef = RA->getValueAsDef("SchedModel");
+    addReadAdvance(RA, getProcModel(ModelDef).Index);
   }
   RecVec SRADefs = Records.getAllDerivedDefinitions("SchedReadAdvance");
-  for (RecIter RAI = SRADefs.begin(), RAE = SRADefs.end(); RAI != RAE; ++RAI) {
-    if ((*RAI)->getValueInit("SchedModel")->isComplete()) {
-      Record *ModelDef = (*RAI)->getValueAsDef("SchedModel");
-      addReadAdvance(*RAI, getProcModel(ModelDef).Index);
+  for (Record *SRA : SRADefs) {
+    if (SRA->getValueInit("SchedModel")->isComplete()) {
+      Record *ModelDef = SRA->getValueAsDef("SchedModel");
+      addReadAdvance(SRA, getProcModel(ModelDef).Index);
     }
   }
   // Add ProcResGroups that are defined within this processor model, which may
   // not be directly referenced but may directly specify a buffer size.
   RecVec ProcResGroups = Records.getAllDerivedDefinitions("ProcResGroup");
-  for (RecIter RI = ProcResGroups.begin(), RE = ProcResGroups.end();
-       RI != RE; ++RI) {
-    if (!(*RI)->getValueInit("SchedModel")->isComplete())
+  for (Record *PRG : ProcResGroups) {
+    if (!PRG->getValueInit("SchedModel")->isComplete())
       continue;
-    CodeGenProcModel &PM = getProcModel((*RI)->getValueAsDef("SchedModel"));
-    if (!is_contained(PM.ProcResourceDefs, *RI))
-      PM.ProcResourceDefs.push_back(*RI);
+    CodeGenProcModel &PM = getProcModel(PRG->getValueAsDef("SchedModel"));
+    if (!is_contained(PM.ProcResourceDefs, PRG))
+      PM.ProcResourceDefs.push_back(PRG);
   }
   // Finalize each ProcModel by sorting the record arrays.
   for (CodeGenProcModel &PM : ProcModels) {
@@ -1687,7 +1672,8 @@ void CodeGenSchedModels::collectRWResources(ArrayRef<unsigned> Writes,
 
 // Find the processor's resource units for this kind of resource.
 Record *CodeGenSchedModels::findProcResUnits(Record *ProcResKind,
-                                             const CodeGenProcModel &PM) const {
+                                             const CodeGenProcModel &PM,
+                                             ArrayRef<SMLoc> Loc) const {
   if (ProcResKind->isSubClassOf("ProcResourceUnits"))
     return ProcResKind;
 
@@ -1695,34 +1681,30 @@ Record *CodeGenSchedModels::findProcResUnits(Record *ProcResKind,
   assert(!ProcResourceDefs.empty());
   assert(!ProcResGroups.empty());
 
-  for (RecIter RI = ProcResourceDefs.begin(), RE = ProcResourceDefs.end();
-       RI != RE; ++RI) {
-
-    if ((*RI)->getValueAsDef("Kind") == ProcResKind
-        && (*RI)->getValueAsDef("SchedModel") == PM.ModelDef) {
+  for (Record *ProcResDef : ProcResourceDefs) {
+    if (ProcResDef->getValueAsDef("Kind") == ProcResKind
+        && ProcResDef->getValueAsDef("SchedModel") == PM.ModelDef) {
       if (ProcUnitDef) {
-        PrintFatalError((*RI)->getLoc(),
+        PrintFatalError(Loc,
                         "Multiple ProcessorResourceUnits associated with "
                         + ProcResKind->getName());
       }
-      ProcUnitDef = *RI;
+      ProcUnitDef = ProcResDef;
     }
   }
-  for (RecIter RI = ProcResGroups.begin(), RE = ProcResGroups.end();
-       RI != RE; ++RI) {
-
-    if (*RI == ProcResKind
-        && (*RI)->getValueAsDef("SchedModel") == PM.ModelDef) {
+  for (Record *ProcResGroup : ProcResGroups) {
+    if (ProcResGroup == ProcResKind
+        && ProcResGroup->getValueAsDef("SchedModel") == PM.ModelDef) {
       if (ProcUnitDef) {
-        PrintFatalError((*RI)->getLoc(),
+        PrintFatalError(Loc,
                         "Multiple ProcessorResourceUnits associated with "
                         + ProcResKind->getName());
       }
-      ProcUnitDef = *RI;
+      ProcUnitDef = ProcResGroup;
     }
   }
   if (!ProcUnitDef) {
-    PrintFatalError(ProcResKind->getLoc(),
+    PrintFatalError(Loc,
                     "No ProcessorResources associated with "
                     + ProcResKind->getName());
   }
@@ -1731,9 +1713,10 @@ Record *CodeGenSchedModels::findProcResUnits(Record *ProcResKind,
 
 // Iteratively add a resource and its super resources.
 void CodeGenSchedModels::addProcResource(Record *ProcResKind,
-                                         CodeGenProcModel &PM) {
+                                         CodeGenProcModel &PM,
+                                         ArrayRef<SMLoc> Loc) {
   while (true) {
-    Record *ProcResUnits = findProcResUnits(ProcResKind, PM);
+    Record *ProcResUnits = findProcResUnits(ProcResKind, PM, Loc);
 
     // See if this ProcResource is already associated with this processor.
     if (is_contained(PM.ProcResourceDefs, ProcResUnits))
@@ -1763,7 +1746,7 @@ void CodeGenSchedModels::addWriteRes(Record *ProcWriteResDef, unsigned PIdx) {
   RecVec ProcResDefs = ProcWriteResDef->getValueAsListOfDefs("ProcResources");
   for (RecIter WritePRI = ProcResDefs.begin(), WritePRE = ProcResDefs.end();
        WritePRI != WritePRE; ++WritePRI) {
-    addProcResource(*WritePRI, ProcModels[PIdx]);
+    addProcResource(*WritePRI, ProcModels[PIdx], ProcWriteResDef->getLoc());
   }
 }
 
@@ -1832,9 +1815,8 @@ void CodeGenSchedClass::dump(const CodeGenSchedModels* SchedModels) const {
   dbgs() << "\n  ProcIdx: "; dumpIdxVec(ProcIndices); dbgs() << '\n';
   if (!Transitions.empty()) {
     dbgs() << "\n Transitions for Proc ";
-    for (std::vector<CodeGenSchedTransition>::const_iterator
-           TI = Transitions.begin(), TE = Transitions.end(); TI != TE; ++TI) {
-      dumpIdxVec(TI->ProcIndices);
+    for (const CodeGenSchedTransition &Transition : Transitions) {
+      dumpIdxVec(Transition.ProcIndices);
     }
   }
 }
diff --git a/utils/TableGen/CodeGenSchedule.h b/utils/TableGen/CodeGenSchedule.h
index 755ffd25b0cb..46e22cd12810 100644
--- a/utils/TableGen/CodeGenSchedule.h
+++ b/utils/TableGen/CodeGenSchedule.h
@@ -27,11 +27,11 @@ class CodeGenTarget;
 class CodeGenSchedModels;
 class CodeGenInstruction;
 
-typedef std::vector<Record*> RecVec;
-typedef std::vector<Record*>::const_iterator RecIter;
+using RecVec = std::vector<Record*>;
+using RecIter = std::vector<Record*>::const_iterator;
 
-typedef std::vector<unsigned> IdxVec;
-typedef std::vector<unsigned>::const_iterator IdxIter;
+using IdxVec = std::vector<unsigned>;
+using IdxIter = std::vector<unsigned>::const_iterator;
 
 void splitSchedReadWrites(const RecVec &RWDefs,
                           RecVec &WriteDefs, RecVec &ReadDefs);
@@ -234,7 +234,7 @@ class CodeGenSchedModels {
   std::vector<CodeGenProcModel> ProcModels;
 
   // Map Processor's MachineModel or ProcItin to a CodeGenProcModel index.
-  typedef DenseMap<Record*, unsigned> ProcModelMapTy;
+  using ProcModelMapTy = DenseMap<Record*, unsigned>;
   ProcModelMapTy ProcModelMap;
 
   // Per-operand SchedReadWrite types.
@@ -252,15 +252,15 @@ class CodeGenSchedModels {
 
   // Map each instruction to its unique SchedClass index considering the
   // combination of it's itinerary class, SchedRW list, and InstRW records.
-  typedef DenseMap<Record*, unsigned> InstClassMapTy;
+  using InstClassMapTy = DenseMap<Record*, unsigned>;
   InstClassMapTy InstrClassMap;
 
 public:
   CodeGenSchedModels(RecordKeeper& RK, const CodeGenTarget &TGT);
 
   // iterator access to the scheduling classes.
-  typedef std::vector<CodeGenSchedClass>::iterator class_iterator;
-  typedef std::vector<CodeGenSchedClass>::const_iterator const_class_iterator;
+  using class_iterator = std::vector<CodeGenSchedClass>::iterator;
+  using const_class_iterator = std::vector<CodeGenSchedClass>::const_iterator;
   class_iterator classes_begin() { return SchedClasses.begin(); }
   const_class_iterator classes_begin() const { return SchedClasses.begin(); }
   class_iterator classes_end() { return SchedClasses.end(); }
@@ -306,7 +306,7 @@ public:
   }
 
   // Iterate over the unique processor models.
-  typedef std::vector<CodeGenProcModel>::const_iterator ProcIter;
+  using ProcIter = std::vector<CodeGenProcModel>::const_iterator;
   ProcIter procModelBegin() const { return ProcModels.begin(); }
   ProcIter procModelEnd() const { return ProcModels.end(); }
   ArrayRef<CodeGenProcModel> procModels() const { return ProcModels; }
@@ -360,7 +360,7 @@ public:
   // for NoItinerary.
   unsigned getSchedClassIdx(const CodeGenInstruction &Inst) const;
 
-  typedef std::vector<CodeGenSchedClass>::const_iterator SchedClassIter;
+  using SchedClassIter = std::vector<CodeGenSchedClass>::const_iterator;
   SchedClassIter schedClassBegin() const { return SchedClasses.begin(); }
   SchedClassIter schedClassEnd() const { return SchedClasses.end(); }
   ArrayRef<CodeGenSchedClass> schedClasses() const { return SchedClasses; }
@@ -382,8 +382,8 @@ public:
   unsigned findSchedClassIdx(Record *ItinClassDef, ArrayRef<unsigned> Writes,
                              ArrayRef<unsigned> Reads) const;
 
-  Record *findProcResUnits(Record *ProcResKind,
-                           const CodeGenProcModel &PM) const;
+  Record *findProcResUnits(Record *ProcResKind, const CodeGenProcModel &PM,
+                           ArrayRef<SMLoc> Loc) const;
 
 private:
   void collectProcModels();
@@ -432,7 +432,8 @@ private:
   void collectRWResources(ArrayRef<unsigned> Writes, ArrayRef<unsigned> Reads,
                           ArrayRef<unsigned> ProcIndices);
 
-  void addProcResource(Record *ProcResourceKind, CodeGenProcModel &PM);
+  void addProcResource(Record *ProcResourceKind, CodeGenProcModel &PM,
+                       ArrayRef<SMLoc> Loc);
 
   void addWriteRes(Record *ProcWriteResDef, unsigned PIdx);
 
diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp
index 58df3ceceee7..827b6083c17f 100644
--- a/utils/TableGen/CodeGenTarget.cpp
+++ b/utils/TableGen/CodeGenTarget.cpp
@@ -82,6 +82,7 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
   case MVT::v16i1:    return "MVT::v16i1";
   case MVT::v32i1:    return "MVT::v32i1";
   case MVT::v64i1:    return "MVT::v64i1";
+  case MVT::v128i1:   return "MVT::v128i1";
   case MVT::v512i1:   return "MVT::v512i1";
   case MVT::v1024i1:  return "MVT::v1024i1";
   case MVT::v1i8:     return "MVT::v1i8";
@@ -191,7 +192,7 @@ std::string llvm::getQualifiedName(const Record *R) {
 /// getTarget - Return the current instance of the Target class.
 ///
 CodeGenTarget::CodeGenTarget(RecordKeeper &records)
-  : Records(records) {
+  : Records(records), CGH(records) {
   std::vector<Record*> Targets = Records.getAllDerivedDefinitions("Target");
   if (Targets.size() == 0)
     PrintFatalError("ERROR: No 'Target' subclasses defined!");
@@ -266,7 +267,7 @@ Record *CodeGenTarget::getAsmWriter() const {
 
 CodeGenRegBank &CodeGenTarget::getRegBank() const {
   if (!RegBank)
-    RegBank = llvm::make_unique<CodeGenRegBank>(Records);
+    RegBank = llvm::make_unique<CodeGenRegBank>(Records, getHwModes());
   return *RegBank;
 }
 
@@ -285,19 +286,19 @@ const CodeGenRegister *CodeGenTarget::getRegisterByName(StringRef Name) const {
   return I->second;
 }
 
-std::vector<MVT::SimpleValueType> CodeGenTarget::
-getRegisterVTs(Record *R) const {
+std::vector<ValueTypeByHwMode> CodeGenTarget::getRegisterVTs(Record *R)
+      const {
   const CodeGenRegister *Reg = getRegBank().getReg(R);
-  std::vector<MVT::SimpleValueType> Result;
+  std::vector<ValueTypeByHwMode> Result;
   for (const auto &RC : getRegBank().getRegClasses()) {
     if (RC.contains(Reg)) {
-      ArrayRef<MVT::SimpleValueType> InVTs = RC.getValueTypes();
+      ArrayRef<ValueTypeByHwMode> InVTs = RC.getValueTypes();
       Result.insert(Result.end(), InVTs.begin(), InVTs.end());
     }
   }
 
   // Remove duplicates.
-  array_pod_sort(Result.begin(), Result.end());
+  std::sort(Result.begin(), Result.end());
   Result.erase(std::unique(Result.begin(), Result.end()), Result.end());
   return Result;
 }
@@ -308,7 +309,7 @@ void CodeGenTarget::ReadLegalValueTypes() const {
     LegalValueTypes.insert(LegalValueTypes.end(), RC.VTs.begin(), RC.VTs.end());
 
   // Remove duplicates.
-  array_pod_sort(LegalValueTypes.begin(), LegalValueTypes.end());
+  std::sort(LegalValueTypes.begin(), LegalValueTypes.end());
   LegalValueTypes.erase(std::unique(LegalValueTypes.begin(),
                                     LegalValueTypes.end()),
                         LegalValueTypes.end());
@@ -348,7 +349,7 @@ GetInstByName(const char *Name,
 void CodeGenTarget::ComputeInstrsByEnum() const {
   static const char *const FixedInstrs[] = {
 #define HANDLE_TARGET_OPCODE(OPC) #OPC,
-#include "llvm/Target/TargetOpcodes.def"
+#include "llvm/CodeGen/TargetOpcodes.def"
       nullptr};
   const auto &Insts = getInstructions();
   for (const char *const *p = FixedInstrs; *p; ++p) {
diff --git a/utils/TableGen/CodeGenTarget.h b/utils/TableGen/CodeGenTarget.h
index ff624ea559e5..89aa81b5fc33 100644
--- a/utils/TableGen/CodeGenTarget.h
+++ b/utils/TableGen/CodeGenTarget.h
@@ -17,8 +17,10 @@
 #ifndef LLVM_UTILS_TABLEGEN_CODEGENTARGET_H
 #define LLVM_UTILS_TABLEGEN_CODEGENTARGET_H
 
+#include "CodeGenHwModes.h"
 #include "CodeGenInstruction.h"
 #include "CodeGenRegisters.h"
+#include "InfoByHwMode.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Record.h"
 #include <algorithm>
@@ -69,7 +71,8 @@ class CodeGenTarget {
                    std::unique_ptr<CodeGenInstruction>> Instructions;
   mutable std::unique_ptr<CodeGenRegBank> RegBank;
   mutable std::vector<Record*> RegAltNameIndices;
-  mutable SmallVector<MVT::SimpleValueType, 8> LegalValueTypes;
+  mutable SmallVector<ValueTypeByHwMode, 8> LegalValueTypes;
+  CodeGenHwModes CGH;
   void ReadRegAltNameIndices() const;
   void ReadInstructions() const;
   void ReadLegalValueTypes() const;
@@ -128,22 +131,18 @@ public:
 
   /// getRegisterVTs - Find the union of all possible SimpleValueTypes for the
   /// specified physical register.
-  std::vector<MVT::SimpleValueType> getRegisterVTs(Record *R) const;
+  std::vector<ValueTypeByHwMode> getRegisterVTs(Record *R) const;
 
-  ArrayRef<MVT::SimpleValueType> getLegalValueTypes() const {
-    if (LegalValueTypes.empty()) ReadLegalValueTypes();
+  ArrayRef<ValueTypeByHwMode> getLegalValueTypes() const {
+    if (LegalValueTypes.empty())
+      ReadLegalValueTypes();
     return LegalValueTypes;
   }
 
-  /// isLegalValueType - Return true if the specified value type is natively
-  /// supported by the target (i.e. there are registers that directly hold it).
-  bool isLegalValueType(MVT::SimpleValueType VT) const {
-    ArrayRef<MVT::SimpleValueType> LegalVTs = getLegalValueTypes();
-    return is_contained(LegalVTs, VT);
-  }
-
   CodeGenSchedModels &getSchedModels() const;
 
+  const CodeGenHwModes &getHwModes() const { return CGH; }
+
 private:
   DenseMap<const Record*, std::unique_ptr<CodeGenInstruction>> &
   getInstructions() const {
diff --git a/utils/TableGen/DAGISelEmitter.cpp b/utils/TableGen/DAGISelEmitter.cpp
index 60fe866f194d..9592ab7052f4 100644
--- a/utils/TableGen/DAGISelEmitter.cpp
+++ b/utils/TableGen/DAGISelEmitter.cpp
@@ -80,11 +80,11 @@ struct PatternSortingPredicate {
   CodeGenDAGPatterns &CGP;
 
   bool operator()(const PatternToMatch *LHS, const PatternToMatch *RHS) {
-    const TreePatternNode *LHSSrc = LHS->getSrcPattern();
-    const TreePatternNode *RHSSrc = RHS->getSrcPattern();
+    const TreePatternNode *LT = LHS->getSrcPattern();
+    const TreePatternNode *RT = RHS->getSrcPattern();
 
-    MVT LHSVT = (LHSSrc->getNumTypes() != 0 ? LHSSrc->getType(0) : MVT::Other);
-    MVT RHSVT = (RHSSrc->getNumTypes() != 0 ? RHSSrc->getType(0) : MVT::Other);
+    MVT LHSVT = LT->getNumTypes() != 0 ? LT->getSimpleType(0) : MVT::Other;
+    MVT RHSVT = RT->getNumTypes() != 0 ? RT->getSimpleType(0) : MVT::Other;
     if (LHSVT.isVector() != RHSVT.isVector())
       return RHSVT.isVector();
 
@@ -127,6 +127,16 @@ void DAGISelEmitter::run(raw_ostream &OS) {
      << "// *** instruction selector class.  These functions are really "
      << "methods.\n\n";
 
+  OS << "// If GET_DAGISEL_DECL is #defined with any value, only function\n"
+        "// declarations will be included when this file is included.\n"
+        "// If GET_DAGISEL_BODY is #defined, its value should be the name of\n"
+        "// the instruction selector class. Function bodies will be emitted\n"
+        "// and each function's name will be qualified with the name of the\n"
+        "// class.\n"
+        "//\n"
+        "// When neither of the GET_DAGISEL* macros is defined, the functions\n"
+        "// are emitted inline.\n\n";
+
   DEBUG(errs() << "\n\nALL PATTERNS TO MATCH:\n\n";
         for (CodeGenDAGPatterns::ptm_iterator I = CGP.ptm_begin(),
              E = CGP.ptm_end(); I != E; ++I) {
diff --git a/utils/TableGen/DAGISelMatcher.cpp b/utils/TableGen/DAGISelMatcher.cpp
index 6ac3958e0f43..4a918d15691b 100644
--- a/utils/TableGen/DAGISelMatcher.cpp
+++ b/utils/TableGen/DAGISelMatcher.cpp
@@ -10,7 +10,6 @@
 #include "DAGISelMatcher.h"
 #include "CodeGenDAGPatterns.h"
 #include "CodeGenTarget.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Record.h"
 using namespace llvm;
@@ -80,18 +79,18 @@ bool Matcher::canMoveBeforeNode(const Matcher *Other) const {
 
 
 ScopeMatcher::~ScopeMatcher() {
-  for (unsigned i = 0, e = Children.size(); i != e; ++i)
-    delete Children[i];
+  for (Matcher *C : Children)
+    delete C;
 }
 
 SwitchOpcodeMatcher::~SwitchOpcodeMatcher() {
-  for (unsigned i = 0, e = Cases.size(); i != e; ++i)
-    delete Cases[i].second;
+  for (auto &C : Cases)
+    delete C.second;
 }
 
 SwitchTypeMatcher::~SwitchTypeMatcher() {
-  for (unsigned i = 0, e = Cases.size(); i != e; ++i)
-    delete Cases[i].second;
+  for (auto &C : Cases)
+    delete C.second;
 }
 
 CheckPredicateMatcher::CheckPredicateMatcher(const TreePredicateFn &pred)
@@ -107,11 +106,11 @@ TreePredicateFn CheckPredicateMatcher::getPredicate() const {
 
 void ScopeMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
   OS.indent(indent) << "Scope\n";
-  for (unsigned i = 0, e = getNumChildren(); i != e; ++i) {
-    if (!getChild(i))
+  for (const Matcher *C : Children) {
+    if (!C)
       OS.indent(indent+1) << "NULL POINTER\n";
     else
-      getChild(i)->print(OS, indent+2);
+      C->print(OS, indent+2);
   }
 }
 
@@ -162,9 +161,9 @@ void CheckOpcodeMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
 
 void SwitchOpcodeMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
   OS.indent(indent) << "SwitchOpcode: {\n";
-  for (unsigned i = 0, e = Cases.size(); i != e; ++i) {
-    OS.indent(indent) << "case " << Cases[i].first->getEnumName() << ":\n";
-    Cases[i].second->print(OS, indent+2);
+  for (const auto &C : Cases) {
+    OS.indent(indent) << "case " << C.first->getEnumName() << ":\n";
+    C.second->print(OS, indent+2);
   }
   OS.indent(indent) << "}\n";
 }
@@ -177,9 +176,9 @@ void CheckTypeMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
 
 void SwitchTypeMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
   OS.indent(indent) << "SwitchType: {\n";
-  for (unsigned i = 0, e = Cases.size(); i != e; ++i) {
-    OS.indent(indent) << "case " << getEnumName(Cases[i].first) << ":\n";
-    Cases[i].second->print(OS, indent+2);
+  for (const auto &C : Cases) {
+    OS.indent(indent) << "case " << getEnumName(C.first) << ":\n";
+    C.second->print(OS, indent+2);
   }
   OS.indent(indent) << "}\n";
 }
diff --git a/utils/TableGen/DAGISelMatcherEmitter.cpp b/utils/TableGen/DAGISelMatcherEmitter.cpp
index 67e8f15b248e..e64943c1d025 100644
--- a/utils/TableGen/DAGISelMatcherEmitter.cpp
+++ b/utils/TableGen/DAGISelMatcherEmitter.cpp
@@ -20,14 +20,16 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 using namespace llvm;
 
 enum {
-  CommentIndent = 30
+  IndexWidth = 6,
+  FullIndexWidth = IndexWidth + 4,
+  HistOpcWidth = 40,
 };
 
 cl::OptionCategory DAGISelCat("Options for -gen-dag-isel");
@@ -45,14 +47,14 @@ static cl::opt<bool> InstrumentCoverage(
 namespace {
 class MatcherTableEmitter {
   const CodeGenDAGPatterns &CGP;
-  
+
   DenseMap<TreePattern *, unsigned> NodePredicateMap;
   std::vector<TreePredicateFn> NodePredicates;
 
   // We de-duplicate the predicates by code string, and use this map to track
   // all the patterns with "identical" predicates.
   StringMap<TinyPtrVector<TreePattern *>> NodePredicatesByCodeToRun;
-  
+
   StringMap<unsigned> PatternPredicateMap;
   std::vector<std::string> PatternPredicates;
 
@@ -81,17 +83,17 @@ public:
     : CGP(cgp) {}
 
   unsigned EmitMatcherList(const Matcher *N, unsigned Indent,
-                           unsigned StartIdx, formatted_raw_ostream &OS);
+                           unsigned StartIdx, raw_ostream &OS);
 
-  void EmitPredicateFunctions(formatted_raw_ostream &OS);
+  void EmitPredicateFunctions(raw_ostream &OS);
 
-  void EmitHistogram(const Matcher *N, formatted_raw_ostream &OS);
+  void EmitHistogram(const Matcher *N, raw_ostream &OS);
 
   void EmitPatternMatchTable(raw_ostream &OS);
 
 private:
   unsigned EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
-                       formatted_raw_ostream &OS);
+                       raw_ostream &OS);
 
   unsigned getNodePredicate(TreePredicateFn Pred) {
     TreePattern *TP = Pred.getOrigPatFragRecord();
@@ -114,7 +116,7 @@ private:
     }
     return Entry-1;
   }
-  
+
   unsigned getPatternPredicate(StringRef PredName) {
     unsigned &Entry = PatternPredicateMap[PredName];
     if (Entry == 0) {
@@ -206,13 +208,37 @@ static std::string getIncludePath(const Record *R) {
   return str;
 }
 
+static void BeginEmitFunction(raw_ostream &OS, StringRef RetType,
+                              StringRef Decl, bool AddOverride) {
+  OS << "#ifdef GET_DAGISEL_DECL\n";
+  OS << RetType << ' ' << Decl;
+  if (AddOverride)
+    OS << " override";
+  OS << ";\n"
+        "#endif\n"
+        "#if defined(GET_DAGISEL_BODY) || DAGISEL_INLINE\n";
+  OS << RetType << " DAGISEL_CLASS_COLONCOLON " << Decl << "\n";
+  if (AddOverride) {
+    OS << "#if DAGISEL_INLINE\n"
+          "  override\n"
+          "#endif\n";
+  }
+}
+
+static void EndEmitFunction(raw_ostream &OS) {
+  OS << "#endif // GET_DAGISEL_BODY\n\n";
+}
+
 void MatcherTableEmitter::EmitPatternMatchTable(raw_ostream &OS) {
 
   assert(isUInt<16>(VecPatterns.size()) &&
          "Using only 16 bits to encode offset into Pattern Table");
   assert(VecPatterns.size() == VecIncludeStrings.size() &&
          "The sizes of Pattern and include vectors should be the same");
-  OS << "StringRef getPatternForIndex(unsigned Index) override {\n";
+
+  BeginEmitFunction(OS, "StringRef", "getPatternForIndex(unsigned Index)",
+                    true/*AddOverride*/);
+  OS << "{\n";
   OS << "static const char * PATTERN_MATCH_TABLE[] = {\n";
 
   for (const auto &It : VecPatterns) {
@@ -222,8 +248,11 @@ void MatcherTableEmitter::EmitPatternMatchTable(raw_ostream &OS) {
   OS << "\n};";
   OS << "\nreturn StringRef(PATTERN_MATCH_TABLE[Index]);";
   OS << "\n}";
+  EndEmitFunction(OS);
 
-  OS << "\nStringRef getIncludePathForIndex(unsigned Index) override {\n";
+  BeginEmitFunction(OS, "StringRef", "getIncludePathForIndex(unsigned Index)",
+                    true/*AddOverride*/);
+  OS << "{\n";
   OS << "static const char * INCLUDE_PATH_TABLE[] = {\n";
 
   for (const auto &It : VecIncludeStrings) {
@@ -233,14 +262,15 @@ void MatcherTableEmitter::EmitPatternMatchTable(raw_ostream &OS) {
   OS << "\n};";
   OS << "\nreturn StringRef(INCLUDE_PATH_TABLE[Index]);";
   OS << "\n}";
+  EndEmitFunction(OS);
 }
 
 /// EmitMatcher - Emit bytes for the specified matcher and return
 /// the number of bytes emitted.
 unsigned MatcherTableEmitter::
 EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
-            formatted_raw_ostream &OS) {
-  OS.PadToColumn(Indent*2);
+            raw_ostream &OS) {
+  OS.indent(Indent*2);
 
   switch (N->getKind()) {
   case Matcher::Scope: {
@@ -256,10 +286,10 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
         ++CurrentIdx;
       } else  {
         if (!OmitComments) {
-          OS << "/*" << CurrentIdx << "*/";
-          OS.PadToColumn(Indent*2) << "/*Scope*/ ";
+          OS << "/*" << format_decimal(CurrentIdx, IndexWidth) << "*/";
+          OS.indent(Indent*2) << "/*Scope*/ ";
         } else
-          OS.PadToColumn(Indent*2);
+          OS.indent(Indent*2);
       }
 
       // We need to encode the child and the offset of the failure code before
@@ -275,9 +305,8 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
 
         TmpBuf.clear();
         raw_svector_ostream OS(TmpBuf);
-        formatted_raw_ostream FOS(OS);
         ChildSize = EmitMatcherList(SM->getChild(i), Indent+1,
-                                    CurrentIdx+VBRSize, FOS);
+                                    CurrentIdx+VBRSize, OS);
       } while (GetVBRSize(ChildSize) != VBRSize);
 
       assert(ChildSize != 0 && "Should not have a zero-sized child!");
@@ -287,8 +316,7 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
         OS << "/*->" << CurrentIdx+ChildSize << "*/";
 
         if (i == 0)
-          OS.PadToColumn(CommentIndent) << "// " << SM->getNumChildren()
-            << " children in Scope";
+          OS << " // " << SM->getNumChildren() << " children in Scope";
       }
 
       OS << '\n' << TmpBuf;
@@ -297,8 +325,8 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
 
     // Emit a zero as a sentinel indicating end of 'Scope'.
     if (!OmitComments)
-      OS << "/*" << CurrentIdx << "*/";
-    OS.PadToColumn(Indent*2) << "0, ";
+      OS << "/*" << format_decimal(CurrentIdx, IndexWidth) << "*/";
+    OS.indent(Indent*2) << "0, ";
     if (!OmitComments)
       OS << "/*End of Scope*/";
     OS << '\n';
@@ -308,9 +336,9 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
   case Matcher::RecordNode:
     OS << "OPC_RecordNode,";
     if (!OmitComments)
-      OS.PadToColumn(CommentIndent) << "// #"
-        << cast<RecordMatcher>(N)->getResultNo() << " = "
-        << cast<RecordMatcher>(N)->getWhatFor();
+      OS << " // #"
+         << cast<RecordMatcher>(N)->getResultNo() << " = "
+         << cast<RecordMatcher>(N)->getWhatFor();
     OS << '\n';
     return 1;
 
@@ -318,9 +346,9 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
     OS << "OPC_RecordChild" << cast<RecordChildMatcher>(N)->getChildNo()
        << ',';
     if (!OmitComments)
-      OS.PadToColumn(CommentIndent) << "// #"
-        << cast<RecordChildMatcher>(N)->getResultNo() << " = "
-        << cast<RecordChildMatcher>(N)->getWhatFor();
+      OS << " // #"
+         << cast<RecordChildMatcher>(N)->getResultNo() << " = "
+         << cast<RecordChildMatcher>(N)->getWhatFor();
     OS << '\n';
     return 1;
 
@@ -362,7 +390,7 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
     StringRef Pred =cast<CheckPatternPredicateMatcher>(N)->getPredicate();
     OS << "OPC_CheckPatternPredicate, " << getPatternPredicate(Pred) << ',';
     if (!OmitComments)
-      OS.PadToColumn(CommentIndent) << "// " << Pred;
+      OS << " // " << Pred;
     OS << '\n';
     return 2;
   }
@@ -370,7 +398,7 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
     TreePredicateFn Pred = cast<CheckPredicateMatcher>(N)->getPredicate();
     OS << "OPC_CheckPredicate, " << getNodePredicate(Pred) << ',';
     if (!OmitComments)
-      OS.PadToColumn(CommentIndent) << "// " << Pred.getFnName();
+      OS << " // " << Pred.getFnName();
     OS << '\n';
     return 2;
   }
@@ -423,17 +451,16 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
 
         TmpBuf.clear();
         raw_svector_ostream OS(TmpBuf);
-        formatted_raw_ostream FOS(OS);
         ChildSize = EmitMatcherList(Child, Indent+1, CurrentIdx+VBRSize+IdxSize,
-                                    FOS);
+                                    OS);
       } while (GetVBRSize(ChildSize) != VBRSize);
 
       assert(ChildSize != 0 && "Should not have a zero-sized child!");
 
       if (i != 0) {
         if (!OmitComments)
-          OS << "/*" << CurrentIdx << "*/";
-        OS.PadToColumn(Indent*2);
+          OS << "/*" << format_decimal(CurrentIdx, IndexWidth) << "*/";
+        OS.indent(Indent*2);
         if (!OmitComments)
           OS << (isa<SwitchOpcodeMatcher>(N) ?
                      "/*SwitchOpcode*/ " : "/*SwitchType*/ ");
@@ -458,11 +485,11 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
 
     // Emit the final zero to terminate the switch.
     if (!OmitComments)
-      OS << "/*" << CurrentIdx << "*/";
-    OS.PadToColumn(Indent*2) << "0, ";
+      OS << "/*" << format_decimal(CurrentIdx, IndexWidth) << "*/";
+    OS.indent(Indent*2) << "0,";
     if (!OmitComments)
       OS << (isa<SwitchOpcodeMatcher>(N) ?
-             "// EndSwitchOpcode" : "// EndSwitchType");
+             " // EndSwitchOpcode" : " // EndSwitchType");
 
     OS << '\n';
     ++CurrentIdx;
@@ -470,11 +497,14 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
   }
 
  case Matcher::CheckType:
-    assert(cast<CheckTypeMatcher>(N)->getResNo() == 0 &&
-           "FIXME: Add support for CheckType of resno != 0");
-    OS << "OPC_CheckType, "
-       << getEnumName(cast<CheckTypeMatcher>(N)->getType()) << ",\n";
-    return 2;
+    if (cast<CheckTypeMatcher>(N)->getResNo() == 0) {
+      OS << "OPC_CheckType, "
+         << getEnumName(cast<CheckTypeMatcher>(N)->getType()) << ",\n";
+      return 2;
+    }
+    OS << "OPC_CheckTypeRes, " << cast<CheckTypeMatcher>(N)->getResNo()
+       << ", " << getEnumName(cast<CheckTypeMatcher>(N)->getType()) << ",\n";
+    return 3;
 
   case Matcher::CheckChildType:
     OS << "OPC_CheckChild"
@@ -513,7 +543,7 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
        << CCPM->getMatchNumber() << ',';
 
     if (!OmitComments) {
-      OS.PadToColumn(CommentIndent) << "// " << Pattern.getSelectFunc();
+      OS << " // " << Pattern.getSelectFunc();
       OS << ":$" << CCPM->getName();
       for (unsigned i = 0, e = Pattern.getNumOperands(); i != e; ++i)
         OS << " #" << CCPM->getFirstResult()+i;
@@ -615,7 +645,7 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
     OS << "OPC_EmitNodeXForm, " << getNodeXFormID(XF->getNodeXForm()) << ", "
        << XF->getSlot() << ',';
     if (!OmitComments)
-      OS.PadToColumn(CommentIndent) << "// "<<XF->getNodeXForm()->getName();
+      OS << " // "<<XF->getNodeXForm()->getName();
     OS <<'\n';
     return 3;
   }
@@ -636,7 +666,7 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
         unsigned Offset =
             getPatternIdxFromTable(src + " -> " + dst, std::move(include_src));
         OS << "TARGET_VAL(" << Offset << "),\n";
-        OS.PadToColumn(Indent * 2);
+        OS.indent(FullIndexWidth + Indent * 2);
       }
     }
     const EmitNodeMatcherCommon *EN = cast<EmitNodeMatcherCommon>(N);
@@ -655,7 +685,7 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
       OS << "|OPFL_Variadic" << EN->getNumFixedArityOperands();
     OS << ",\n";
 
-    OS.PadToColumn(Indent*2+4);
+    OS.indent(FullIndexWidth + Indent*2+4);
     if (!CompressVTs) {
       OS << EN->getNumVTs();
       if (!OmitComments)
@@ -677,7 +707,7 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
       // Print the result #'s for EmitNode.
       if (const EmitNodeMatcher *E = dyn_cast<EmitNodeMatcher>(EN)) {
         if (unsigned NumResults = EN->getNumVTs()) {
-          OS.PadToColumn(CommentIndent) << "// Results =";
+          OS << " // Results =";
           unsigned First = E->getFirstResultSlot();
           for (unsigned i = 0; i != NumResults; ++i)
             OS << " #" << First+i;
@@ -686,10 +716,10 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
       OS << '\n';
 
       if (const MorphNodeToMatcher *SNT = dyn_cast<MorphNodeToMatcher>(N)) {
-        OS.PadToColumn(Indent*2) << "// Src: "
+        OS.indent(FullIndexWidth + Indent*2) << "// Src: "
           << *SNT->getPattern().getSrcPattern() << " - Complexity = "
           << SNT->getPattern().getPatternComplexity(CGP) << '\n';
-        OS.PadToColumn(Indent*2) << "// Dst: "
+        OS.indent(FullIndexWidth + Indent*2) << "// Dst: "
           << *SNT->getPattern().getDstPattern() << '\n';
       }
     } else
@@ -713,7 +743,7 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
       unsigned Offset =
           getPatternIdxFromTable(src + " -> " + dst, std::move(include_src));
       OS << "TARGET_VAL(" << Offset << "),\n";
-      OS.PadToColumn(Indent * 2);
+      OS.indent(FullIndexWidth + Indent * 2);
     }
     OS << "OPC_CompleteMatch, " << CM->getNumResults() << ", ";
     unsigned NumResultBytes = 0;
@@ -721,10 +751,10 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
       NumResultBytes += EmitVBRValue(CM->getResult(i), OS);
     OS << '\n';
     if (!OmitComments) {
-      OS.PadToColumn(Indent*2) << "// Src: "
+      OS.indent(FullIndexWidth + Indent*2) << " // Src: "
         << *CM->getPattern().getSrcPattern() << " - Complexity = "
         << CM->getPattern().getPatternComplexity(CGP) << '\n';
-      OS.PadToColumn(Indent*2) << "// Dst: "
+      OS.indent(FullIndexWidth + Indent*2) << " // Dst: "
         << *CM->getPattern().getDstPattern();
     }
     OS << '\n';
@@ -737,11 +767,11 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
 /// EmitMatcherList - Emit the bytes for the specified matcher subtree.
 unsigned MatcherTableEmitter::
 EmitMatcherList(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
-                formatted_raw_ostream &OS) {
+                raw_ostream &OS) {
   unsigned Size = 0;
   while (N) {
     if (!OmitComments)
-      OS << "/*" << CurrentIdx << "*/";
+      OS << "/*" << format_decimal(CurrentIdx, IndexWidth) << "*/";
     unsigned MatcherSize = EmitMatcher(N, Indent, CurrentIdx, OS);
     Size += MatcherSize;
     CurrentIdx += MatcherSize;
@@ -753,46 +783,55 @@ EmitMatcherList(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
   return Size;
 }
 
-void MatcherTableEmitter::EmitPredicateFunctions(formatted_raw_ostream &OS) {
+void MatcherTableEmitter::EmitPredicateFunctions(raw_ostream &OS) {
   // Emit pattern predicates.
   if (!PatternPredicates.empty()) {
-    OS << "bool CheckPatternPredicate(unsigned PredNo) const override {\n";
+    BeginEmitFunction(OS, "bool",
+          "CheckPatternPredicate(unsigned PredNo) const", true/*AddOverride*/);
+    OS << "{\n";
     OS << "  switch (PredNo) {\n";
     OS << "  default: llvm_unreachable(\"Invalid predicate in table?\");\n";
     for (unsigned i = 0, e = PatternPredicates.size(); i != e; ++i)
       OS << "  case " << i << ": return "  << PatternPredicates[i] << ";\n";
     OS << "  }\n";
-    OS << "}\n\n";
+    OS << "}\n";
+    EndEmitFunction(OS);
   }
 
   // Emit Node predicates.
   if (!NodePredicates.empty()) {
-    OS << "bool CheckNodePredicate(SDNode *Node,\n";
-    OS << "                        unsigned PredNo) const override {\n";
+    BeginEmitFunction(OS, "bool",
+          "CheckNodePredicate(SDNode *Node, unsigned PredNo) const",
+          true/*AddOverride*/);
+    OS << "{\n";
     OS << "  switch (PredNo) {\n";
     OS << "  default: llvm_unreachable(\"Invalid predicate in table?\");\n";
     for (unsigned i = 0, e = NodePredicates.size(); i != e; ++i) {
       // Emit the predicate code corresponding to this pattern.
       TreePredicateFn PredFn = NodePredicates[i];
-      
+
       assert(!PredFn.isAlwaysTrue() && "No code in this predicate");
       OS << "  case " << i << ": { \n";
       for (auto *SimilarPred :
            NodePredicatesByCodeToRun[PredFn.getCodeToRunOnSDNode()])
         OS << "    // " << TreePredicateFn(SimilarPred).getFnName() <<'\n';
-      
+
       OS << PredFn.getCodeToRunOnSDNode() << "\n  }\n";
     }
     OS << "  }\n";
-    OS << "}\n\n";
+    OS << "}\n";
+    EndEmitFunction(OS);
   }
 
   // Emit CompletePattern matchers.
   // FIXME: This should be const.
   if (!ComplexPatterns.empty()) {
-    OS << "bool CheckComplexPattern(SDNode *Root, SDNode *Parent,\n";
-    OS << "                         SDValue N, unsigned PatternNo,\n";
-    OS << "         SmallVectorImpl<std::pair<SDValue, SDNode*> > &Result) override {\n";
+    BeginEmitFunction(OS, "bool",
+          "CheckComplexPattern(SDNode *Root, SDNode *Parent,\n"
+          "      SDValue N, unsigned PatternNo,\n"
+          "      SmallVectorImpl<std::pair<SDValue, SDNode*>> &Result)",
+          true/*AddOverride*/);
+    OS << "{\n";
     OS << "  unsigned NextRes = Result.size();\n";
     OS << "  switch (PatternNo) {\n";
     OS << "  default: llvm_unreachable(\"Invalid pattern # in table?\");\n";
@@ -836,14 +875,17 @@ void MatcherTableEmitter::EmitPredicateFunctions(formatted_raw_ostream &OS) {
       }
     }
     OS << "  }\n";
-    OS << "}\n\n";
+    OS << "}\n";
+    EndEmitFunction(OS);
   }
 
 
   // Emit SDNodeXForm handlers.
   // FIXME: This should be const.
   if (!NodeXForms.empty()) {
-    OS << "SDValue RunSDNodeXForm(SDValue V, unsigned XFormNo) override {\n";
+    BeginEmitFunction(OS, "SDValue",
+          "RunSDNodeXForm(SDValue V, unsigned XFormNo)", true/*AddOverride*/);
+    OS << "{\n";
     OS << "  switch (XFormNo) {\n";
     OS << "  default: llvm_unreachable(\"Invalid xform # in table?\");\n";
 
@@ -869,7 +911,8 @@ void MatcherTableEmitter::EmitPredicateFunctions(formatted_raw_ostream &OS) {
       OS << Code << "\n  }\n";
     }
     OS << "  }\n";
-    OS << "}\n\n";
+    OS << "}\n";
+    EndEmitFunction(OS);
   }
 }
 
@@ -895,8 +938,51 @@ static void BuildHistogram(const Matcher *M, std::vector<unsigned> &OpcodeFreq){
   }
 }
 
+static StringRef getOpcodeString(Matcher::KindTy Kind) {
+  switch (Kind) {
+  case Matcher::Scope: return "OPC_Scope"; break;
+  case Matcher::RecordNode: return "OPC_RecordNode"; break;
+  case Matcher::RecordChild: return "OPC_RecordChild"; break;
+  case Matcher::RecordMemRef: return "OPC_RecordMemRef"; break;
+  case Matcher::CaptureGlueInput: return "OPC_CaptureGlueInput"; break;
+  case Matcher::MoveChild: return "OPC_MoveChild"; break;
+  case Matcher::MoveParent: return "OPC_MoveParent"; break;
+  case Matcher::CheckSame: return "OPC_CheckSame"; break;
+  case Matcher::CheckChildSame: return "OPC_CheckChildSame"; break;
+  case Matcher::CheckPatternPredicate:
+    return "OPC_CheckPatternPredicate"; break;
+  case Matcher::CheckPredicate: return "OPC_CheckPredicate"; break;
+  case Matcher::CheckOpcode: return "OPC_CheckOpcode"; break;
+  case Matcher::SwitchOpcode: return "OPC_SwitchOpcode"; break;
+  case Matcher::CheckType: return "OPC_CheckType"; break;
+  case Matcher::SwitchType: return "OPC_SwitchType"; break;
+  case Matcher::CheckChildType: return "OPC_CheckChildType"; break;
+  case Matcher::CheckInteger: return "OPC_CheckInteger"; break;
+  case Matcher::CheckChildInteger: return "OPC_CheckChildInteger"; break;
+  case Matcher::CheckCondCode: return "OPC_CheckCondCode"; break;
+  case Matcher::CheckValueType: return "OPC_CheckValueType"; break;
+  case Matcher::CheckComplexPat: return "OPC_CheckComplexPat"; break;
+  case Matcher::CheckAndImm: return "OPC_CheckAndImm"; break;
+  case Matcher::CheckOrImm: return "OPC_CheckOrImm"; break;
+  case Matcher::CheckFoldableChainNode:
+    return "OPC_CheckFoldableChainNode"; break;
+  case Matcher::EmitInteger: return "OPC_EmitInteger"; break;
+  case Matcher::EmitStringInteger: return "OPC_EmitStringInteger"; break;
+  case Matcher::EmitRegister: return "OPC_EmitRegister"; break;
+  case Matcher::EmitConvertToTarget: return "OPC_EmitConvertToTarget"; break;
+  case Matcher::EmitMergeInputChains: return "OPC_EmitMergeInputChains"; break;
+  case Matcher::EmitCopyToReg: return "OPC_EmitCopyToReg"; break;
+  case Matcher::EmitNode: return "OPC_EmitNode"; break;
+  case Matcher::MorphNodeTo: return "OPC_MorphNodeTo"; break;
+  case Matcher::EmitNodeXForm: return "OPC_EmitNodeXForm"; break;
+  case Matcher::CompleteMatch: return "OPC_CompleteMatch"; break;
+  }
+
+  llvm_unreachable("Unhandled opcode?");
+}
+
 void MatcherTableEmitter::EmitHistogram(const Matcher *M,
-                                        formatted_raw_ostream &OS) {
+                                        raw_ostream &OS) {
   if (OmitComments)
     return;
 
@@ -905,47 +991,9 @@ void MatcherTableEmitter::EmitHistogram(const Matcher *M,
 
   OS << "  // Opcode Histogram:\n";
   for (unsigned i = 0, e = OpcodeFreq.size(); i != e; ++i) {
-    OS << "  // #";
-    switch ((Matcher::KindTy)i) {
-    case Matcher::Scope: OS << "OPC_Scope"; break;
-    case Matcher::RecordNode: OS << "OPC_RecordNode"; break;
-    case Matcher::RecordChild: OS << "OPC_RecordChild"; break;
-    case Matcher::RecordMemRef: OS << "OPC_RecordMemRef"; break;
-    case Matcher::CaptureGlueInput: OS << "OPC_CaptureGlueInput"; break;
-    case Matcher::MoveChild: OS << "OPC_MoveChild"; break;
-    case Matcher::MoveParent: OS << "OPC_MoveParent"; break;
-    case Matcher::CheckSame: OS << "OPC_CheckSame"; break;
-    case Matcher::CheckChildSame: OS << "OPC_CheckChildSame"; break;
-    case Matcher::CheckPatternPredicate:
-      OS << "OPC_CheckPatternPredicate"; break;
-    case Matcher::CheckPredicate: OS << "OPC_CheckPredicate"; break;
-    case Matcher::CheckOpcode: OS << "OPC_CheckOpcode"; break;
-    case Matcher::SwitchOpcode: OS << "OPC_SwitchOpcode"; break;
-    case Matcher::CheckType: OS << "OPC_CheckType"; break;
-    case Matcher::SwitchType: OS << "OPC_SwitchType"; break;
-    case Matcher::CheckChildType: OS << "OPC_CheckChildType"; break;
-    case Matcher::CheckInteger: OS << "OPC_CheckInteger"; break;
-    case Matcher::CheckChildInteger: OS << "OPC_CheckChildInteger"; break;
-    case Matcher::CheckCondCode: OS << "OPC_CheckCondCode"; break;
-    case Matcher::CheckValueType: OS << "OPC_CheckValueType"; break;
-    case Matcher::CheckComplexPat: OS << "OPC_CheckComplexPat"; break;
-    case Matcher::CheckAndImm: OS << "OPC_CheckAndImm"; break;
-    case Matcher::CheckOrImm: OS << "OPC_CheckOrImm"; break;
-    case Matcher::CheckFoldableChainNode:
-      OS << "OPC_CheckFoldableChainNode"; break;
-    case Matcher::EmitInteger: OS << "OPC_EmitInteger"; break;
-    case Matcher::EmitStringInteger: OS << "OPC_EmitStringInteger"; break;
-    case Matcher::EmitRegister: OS << "OPC_EmitRegister"; break;
-    case Matcher::EmitConvertToTarget: OS << "OPC_EmitConvertToTarget"; break;
-    case Matcher::EmitMergeInputChains: OS << "OPC_EmitMergeInputChains"; break;
-    case Matcher::EmitCopyToReg: OS << "OPC_EmitCopyToReg"; break;
-    case Matcher::EmitNode: OS << "OPC_EmitNode"; break;
-    case Matcher::MorphNodeTo: OS << "OPC_MorphNodeTo"; break;
-    case Matcher::EmitNodeXForm: OS << "OPC_EmitNodeXForm"; break;
-    case Matcher::CompleteMatch: OS << "OPC_CompleteMatch"; break;
-    }
-
-    OS.PadToColumn(40) << " = " << OpcodeFreq[i] << '\n';
+    OS << "  // #"
+       << left_justify(getOpcodeString((Matcher::KindTy)i), HistOpcWidth)
+       << " = " << OpcodeFreq[i] << '\n';
   }
   OS << '\n';
 }
@@ -953,19 +1001,45 @@ void MatcherTableEmitter::EmitHistogram(const Matcher *M,
 
 void llvm::EmitMatcherTable(const Matcher *TheMatcher,
                             const CodeGenDAGPatterns &CGP,
-                            raw_ostream &O) {
-  formatted_raw_ostream OS(O);
-
-  OS << "// The main instruction selector code.\n";
-  OS << "void SelectCode(SDNode *N) {\n";
-
+                            raw_ostream &OS) {
+  OS << "#if defined(GET_DAGISEL_DECL) && defined(GET_DAGISEL_BODY)\n";
+  OS << "#error GET_DAGISEL_DECL and GET_DAGISEL_BODY cannot be both defined, ";
+  OS << "undef both for inline definitions\n";
+  OS << "#endif\n\n";
+
+  // Emit a check for omitted class name.
+  OS << "#ifdef GET_DAGISEL_BODY\n";
+  OS << "#define LOCAL_DAGISEL_STRINGIZE(X) LOCAL_DAGISEL_STRINGIZE_(X)\n";
+  OS << "#define LOCAL_DAGISEL_STRINGIZE_(X) #X\n";
+  OS << "static_assert(sizeof(LOCAL_DAGISEL_STRINGIZE(GET_DAGISEL_BODY)) > 1,"
+        "\n";
+  OS << "   \"GET_DAGISEL_BODY is empty: it should be defined with the class "
+        "name\");\n";
+  OS << "#undef LOCAL_DAGISEL_STRINGIZE_\n";
+  OS << "#undef LOCAL_DAGISEL_STRINGIZE\n";
+  OS << "#endif\n\n";
+
+  OS << "#if !defined(GET_DAGISEL_DECL) && !defined(GET_DAGISEL_BODY)\n";
+  OS << "#define DAGISEL_INLINE 1\n";
+  OS << "#else\n";
+  OS << "#define DAGISEL_INLINE 0\n";
+  OS << "#endif\n\n";
+
+  OS << "#if !DAGISEL_INLINE\n";
+  OS << "#define DAGISEL_CLASS_COLONCOLON GET_DAGISEL_BODY ::\n";
+  OS << "#else\n";
+  OS << "#define DAGISEL_CLASS_COLONCOLON\n";
+  OS << "#endif\n\n";
+
+  BeginEmitFunction(OS, "void", "SelectCode(SDNode *N)", false/*AddOverride*/);
   MatcherTableEmitter MatcherEmitter(CGP);
 
+  OS << "{\n";
   OS << "  // Some target values are emitted as 2 bytes, TARGET_VAL handles\n";
   OS << "  // this.\n";
   OS << "  #define TARGET_VAL(X) X & 255, unsigned(X) >> 8\n";
   OS << "  static const unsigned char MatcherTable[] = {\n";
-  unsigned TotalSize = MatcherEmitter.EmitMatcherList(TheMatcher, 6, 0, OS);
+  unsigned TotalSize = MatcherEmitter.EmitMatcherList(TheMatcher, 1, 0, OS);
   OS << "    0\n  }; // Total Array size is " << (TotalSize+1) << " bytes\n\n";
 
   MatcherEmitter.EmitHistogram(TheMatcher, OS);
@@ -973,10 +1047,26 @@ void llvm::EmitMatcherTable(const Matcher *TheMatcher,
   OS << "  #undef TARGET_VAL\n";
   OS << "  SelectCodeCommon(N, MatcherTable,sizeof(MatcherTable));\n";
   OS << "}\n";
+  EndEmitFunction(OS);
 
   // Next up, emit the function for node and pattern predicates:
   MatcherEmitter.EmitPredicateFunctions(OS);
 
   if (InstrumentCoverage)
     MatcherEmitter.EmitPatternMatchTable(OS);
+
+  // Clean up the preprocessor macros.
+  OS << "\n";
+  OS << "#ifdef DAGISEL_INLINE\n";
+  OS << "#undef DAGISEL_INLINE\n";
+  OS << "#endif\n";
+  OS << "#ifdef DAGISEL_CLASS_COLONCOLON\n";
+  OS << "#undef DAGISEL_CLASS_COLONCOLON\n";
+  OS << "#endif\n";
+  OS << "#ifdef GET_DAGISEL_DECL\n";
+  OS << "#undef GET_DAGISEL_DECL\n";
+  OS << "#endif\n";
+  OS << "#ifdef GET_DAGISEL_BODY\n";
+  OS << "#undef GET_DAGISEL_BODY\n";
+  OS << "#endif\n";
 }
diff --git a/utils/TableGen/DAGISelMatcherGen.cpp b/utils/TableGen/DAGISelMatcherGen.cpp
index d4a56a64324f..a19b9e4b95c7 100644
--- a/utils/TableGen/DAGISelMatcherGen.cpp
+++ b/utils/TableGen/DAGISelMatcherGen.cpp
@@ -33,12 +33,18 @@ static MVT::SimpleValueType getRegisterValueType(Record *R,
 
     if (!FoundRC) {
       FoundRC = true;
-      VT = RC.getValueTypeNum(0);
+      ValueTypeByHwMode VVT = RC.getValueTypeNum(0);
+      if (VVT.isSimple())
+        VT = VVT.getSimple().SimpleTy;
       continue;
     }
 
     // If this occurs in multiple register classes, they all have to agree.
-    assert(VT == RC.getValueTypeNum(0));
+#ifndef NDEBUG
+    ValueTypeByHwMode T = RC.getValueTypeNum(0);
+    assert((!T.isSimple() || T.getSimple().SimpleTy == VT) &&
+           "ValueType mismatch between register classes for this register");
+#endif
   }
   return VT;
 }
@@ -105,13 +111,15 @@ namespace {
     Matcher *GetMatcher() const { return TheMatcher; }
   private:
     void AddMatcher(Matcher *NewNode);
-    void InferPossibleTypes();
+    void InferPossibleTypes(unsigned ForceMode);
 
     // Matcher Generation.
-    void EmitMatchCode(const TreePatternNode *N, TreePatternNode *NodeNoTypes);
+    void EmitMatchCode(const TreePatternNode *N, TreePatternNode *NodeNoTypes,
+                       unsigned ForceMode);
     void EmitLeafMatchCode(const TreePatternNode *N);
     void EmitOperatorMatchCode(const TreePatternNode *N,
-                               TreePatternNode *NodeNoTypes);
+                               TreePatternNode *NodeNoTypes,
+                               unsigned ForceMode);
 
     /// If this is the first time a node with unique identifier Name has been
     /// seen, record it. Otherwise, emit a check to make sure this is the same
@@ -164,17 +172,19 @@ MatcherGen::MatcherGen(const PatternToMatch &pattern,
   PatWithNoTypes->RemoveAllTypes();
 
   // If there are types that are manifestly known, infer them.
-  InferPossibleTypes();
+  InferPossibleTypes(Pattern.ForceMode);
 }
 
 /// InferPossibleTypes - As we emit the pattern, we end up generating type
 /// checks and applying them to the 'PatWithNoTypes' tree.  As we do this, we
 /// want to propagate implied types as far throughout the tree as possible so
 /// that we avoid doing redundant type checks.  This does the type propagation.
-void MatcherGen::InferPossibleTypes() {
+void MatcherGen::InferPossibleTypes(unsigned ForceMode) {
   // TP - Get *SOME* tree pattern, we don't care which.  It is only used for
   // diagnostics, which we know are impossible at this point.
   TreePattern &TP = *CGP.pf_begin()->second;
+  TP.getInfer().CodeGen = true;
+  TP.getInfer().ForceMode = ForceMode;
 
   bool MadeChange = true;
   while (MadeChange)
@@ -281,7 +291,8 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode *N) {
 }
 
 void MatcherGen::EmitOperatorMatchCode(const TreePatternNode *N,
-                                       TreePatternNode *NodeNoTypes) {
+                                       TreePatternNode *NodeNoTypes,
+                                       unsigned ForceMode) {
   assert(!N->isLeaf() && "Not an operator?");
 
   if (N->getOperator()->isSubClassOf("ComplexPattern")) {
@@ -305,7 +316,7 @@ void MatcherGen::EmitOperatorMatchCode(const TreePatternNode *N,
   const SDNodeInfo &CInfo = CGP.getSDNodeInfo(N->getOperator());
 
   // If this is an 'and R, 1234' where the operation is AND/OR and the RHS is
-  // a constant without a predicate fn that has more that one bit set, handle
+  // a constant without a predicate fn that has more than one bit set, handle
   // this as a special case.  This is usually for targets that have special
   // handling of certain large constants (e.g. alpha with it's 8/16/32-bit
   // handling stuff).  Using these instructions is often far more efficient
@@ -334,7 +345,7 @@ void MatcherGen::EmitOperatorMatchCode(const TreePatternNode *N,
 
         // Match the LHS of the AND as appropriate.
         AddMatcher(new MoveChildMatcher(0));
-        EmitMatchCode(N->getChild(0), NodeNoTypes->getChild(0));
+        EmitMatchCode(N->getChild(0), NodeNoTypes->getChild(0), ForceMode);
         AddMatcher(new MoveParentMatcher());
         return;
       }
@@ -433,7 +444,7 @@ void MatcherGen::EmitOperatorMatchCode(const TreePatternNode *N,
     // Get the code suitable for matching this child.  Move to the child, check
     // it then move back to the parent.
     AddMatcher(new MoveChildMatcher(OpNo));
-    EmitMatchCode(N->getChild(i), NodeNoTypes->getChild(i));
+    EmitMatchCode(N->getChild(i), NodeNoTypes->getChild(i), ForceMode);
     AddMatcher(new MoveParentMatcher());
   }
 }
@@ -456,7 +467,8 @@ bool MatcherGen::recordUniqueNode(const std::string &Name) {
 }
 
 void MatcherGen::EmitMatchCode(const TreePatternNode *N,
-                               TreePatternNode *NodeNoTypes) {
+                               TreePatternNode *NodeNoTypes,
+                               unsigned ForceMode) {
   // If N and NodeNoTypes don't agree on a type, then this is a case where we
   // need to do a type check.  Emit the check, apply the type to NodeNoTypes and
   // reinfer any correlated types.
@@ -465,7 +477,7 @@ void MatcherGen::EmitMatchCode(const TreePatternNode *N,
   for (unsigned i = 0, e = NodeNoTypes->getNumTypes(); i != e; ++i) {
     if (NodeNoTypes->getExtType(i) == N->getExtType(i)) continue;
     NodeNoTypes->setType(i, N->getExtType(i));
-    InferPossibleTypes();
+    InferPossibleTypes(ForceMode);
     ResultsToTypeCheck.push_back(i);
   }
 
@@ -478,14 +490,14 @@ void MatcherGen::EmitMatchCode(const TreePatternNode *N,
   if (N->isLeaf())
     EmitLeafMatchCode(N);
   else
-    EmitOperatorMatchCode(N, NodeNoTypes);
+    EmitOperatorMatchCode(N, NodeNoTypes, ForceMode);
 
   // If there are node predicates for this node, generate their checks.
   for (unsigned i = 0, e = N->getPredicateFns().size(); i != e; ++i)
     AddMatcher(new CheckPredicateMatcher(N->getPredicateFns()[i]));
 
   for (unsigned i = 0, e = ResultsToTypeCheck.size(); i != e; ++i)
-    AddMatcher(new CheckTypeMatcher(N->getType(ResultsToTypeCheck[i]),
+    AddMatcher(new CheckTypeMatcher(N->getSimpleType(ResultsToTypeCheck[i]),
                                     ResultsToTypeCheck[i]));
 }
 
@@ -509,7 +521,7 @@ bool MatcherGen::EmitMatcherCode(unsigned Variant) {
   }
 
   // Emit the matcher for the pattern structure and types.
-  EmitMatchCode(Pattern.getSrcPattern(), PatWithNoTypes);
+  EmitMatchCode(Pattern.getSrcPattern(), PatWithNoTypes, Pattern.ForceMode);
 
   // If the pattern has a predicate on it (e.g. only enabled when a subtarget
   // feature is around, do the check).
@@ -606,7 +618,7 @@ void MatcherGen::EmitResultLeafAsOperand(const TreePatternNode *N,
   assert(N->isLeaf() && "Must be a leaf");
 
   if (IntInit *II = dyn_cast<IntInit>(N->getLeafValue())) {
-    AddMatcher(new EmitIntegerMatcher(II->getValue(), N->getType(0)));
+    AddMatcher(new EmitIntegerMatcher(II->getValue(), N->getSimpleType(0)));
     ResultOps.push_back(NextRecordedOperandNo++);
     return;
   }
@@ -617,13 +629,13 @@ void MatcherGen::EmitResultLeafAsOperand(const TreePatternNode *N,
     if (Def->isSubClassOf("Register")) {
       const CodeGenRegister *Reg =
         CGP.getTargetInfo().getRegBank().getReg(Def);
-      AddMatcher(new EmitRegisterMatcher(Reg, N->getType(0)));
+      AddMatcher(new EmitRegisterMatcher(Reg, N->getSimpleType(0)));
       ResultOps.push_back(NextRecordedOperandNo++);
       return;
     }
 
     if (Def->getName() == "zero_reg") {
-      AddMatcher(new EmitRegisterMatcher(nullptr, N->getType(0)));
+      AddMatcher(new EmitRegisterMatcher(nullptr, N->getSimpleType(0)));
       ResultOps.push_back(NextRecordedOperandNo++);
       return;
     }
@@ -834,7 +846,7 @@ EmitResultInstructionAsOperand(const TreePatternNode *N,
   // Determine the result types.
   SmallVector<MVT::SimpleValueType, 4> ResultVTs;
   for (unsigned i = 0, e = N->getNumTypes(); i != e; ++i)
-    ResultVTs.push_back(N->getType(i));
+    ResultVTs.push_back(N->getSimpleType(i));
 
   // If this is the root instruction of a pattern that has physical registers in
   // its result pattern, add output VTs for them.  For example, X86 has:
diff --git a/utils/TableGen/FastISelEmitter.cpp b/utils/TableGen/FastISelEmitter.cpp
index 25388b75cc0d..610f4d21bf2d 100644
--- a/utils/TableGen/FastISelEmitter.cpp
+++ b/utils/TableGen/FastISelEmitter.cpp
@@ -159,10 +159,11 @@ struct OperandsSignature {
       TreePredicateFn PredFn = ImmPredicates.getPredicate(Code-1);
 
       // Emit the type check.
-      OS << "VT == "
-         << getEnumName(PredFn.getOrigPatFragRecord()->getTree(0)->getType(0))
-         << " && ";
-
+      TreePattern *TP = PredFn.getOrigPatFragRecord();
+      ValueTypeByHwMode VVT = TP->getTree(0)->getType(0);
+      assert(VVT.isSimple() &&
+             "Cannot use variable value types with fast isel");
+      OS << "VT == " << getEnumName(VVT.getSimple().SimpleTy) << " && ";
 
       OS << PredFn.getFnName() << "(imm" << i <<')';
       EmittedAnything = true;
@@ -217,10 +218,6 @@ struct OperandsSignature {
           PredNo = ImmediatePredicates.getIDFor(PredFn)+1;
         }
 
-        // Handle unmatched immediate sizes here.
-        //if (Op->getType(0) != VT)
-        //  return false;
-
         Operands.push_back(OpKind::getImm(PredNo));
         continue;
       }
@@ -240,12 +237,12 @@ struct OperandsSignature {
         return false;
       }
 
-      assert(Op->hasTypeSet(0) && "Type infererence not done?");
+      assert(Op->hasConcreteType(0) && "Type infererence not done?");
 
       // For now, all the operands must have the same type (if they aren't
       // immediates).  Note that this causes us to reject variable sized shifts
       // on X86.
-      if (Op->getType(0) != VT)
+      if (Op->getSimpleType(0) != VT)
         return false;
 
       DefInit *OpDI = dyn_cast<DefInit>(Op->getLeafValue());
@@ -366,7 +363,7 @@ struct OperandsSignature {
 
 namespace {
 class FastISelMap {
-  // A multimap is needed instead of a "plain" map because the key is 
+  // A multimap is needed instead of a "plain" map because the key is
   // the instruction's complexity (an int) and they are not unique.
   typedef std::multimap<int, InstructionMemo> PredMap;
   typedef std::map<MVT::SimpleValueType, PredMap> RetPredMap;
@@ -377,7 +374,7 @@ class FastISelMap {
 
   OperandsOpcodeTypeRetPredMap SimplePatterns;
 
-  // This is used to check that there are no duplicate predicates            
+  // This is used to check that there are no duplicate predicates
   typedef std::multimap<std::string, bool> PredCheckMap;
   typedef std::map<MVT::SimpleValueType, PredCheckMap> RetPredCheckMap;
   typedef std::map<MVT::SimpleValueType, RetPredCheckMap> TypeRetPredCheckMap;
@@ -398,10 +395,10 @@ public:
   void collectPatterns(CodeGenDAGPatterns &CGP);
   void printImmediatePredicates(raw_ostream &OS);
   void printFunctionDefinitions(raw_ostream &OS);
-private:  
-  void emitInstructionCode(raw_ostream &OS, 
+private:
+  void emitInstructionCode(raw_ostream &OS,
                            const OperandsSignature &Operands,
-                           const PredMap &PM, 
+                           const PredMap &PM,
                            const std::string &RetVTName);
 };
 } // End anonymous namespace
@@ -506,11 +503,11 @@ void FastISelMap::collectPatterns(CodeGenDAGPatterns &CGP) {
     Record *InstPatOp = InstPatNode->getOperator();
     std::string OpcodeName = getOpcodeName(InstPatOp, CGP);
     MVT::SimpleValueType RetVT = MVT::isVoid;
-    if (InstPatNode->getNumTypes()) RetVT = InstPatNode->getType(0);
+    if (InstPatNode->getNumTypes()) RetVT = InstPatNode->getSimpleType(0);
     MVT::SimpleValueType VT = RetVT;
     if (InstPatNode->getNumChildren()) {
       assert(InstPatNode->getChild(0)->getNumTypes() == 1);
-      VT = InstPatNode->getChild(0)->getType(0);
+      VT = InstPatNode->getChild(0)->getSimpleType(0);
     }
 
     // For now, filter out any instructions with predicates.
@@ -575,7 +572,7 @@ void FastISelMap::collectPatterns(CodeGenDAGPatterns &CGP) {
       PhysRegInputs,
       PredicateCheck
     };
-    
+
     int complexity = Pattern.getPatternComplexity(CGP);
 
     if (SimplePatternsCheck[Operands][OpcodeName][VT]
@@ -615,9 +612,9 @@ void FastISelMap::printImmediatePredicates(raw_ostream &OS) {
   OS << "\n\n";
 }
 
-void FastISelMap::emitInstructionCode(raw_ostream &OS, 
+void FastISelMap::emitInstructionCode(raw_ostream &OS,
                                       const OperandsSignature &Operands,
-                                      const PredMap &PM, 
+                                      const PredMap &PM,
                                       const std::string &RetVTName) {
   // Emit code for each possible instruction. There may be
   // multiple if there are subtarget concerns.  A reverse iterator
diff --git a/utils/TableGen/GlobalISelEmitter.cpp b/utils/TableGen/GlobalISelEmitter.cpp
index cafcbeb57de5..b80f02355062 100644
--- a/utils/TableGen/GlobalISelEmitter.cpp
+++ b/utils/TableGen/GlobalISelEmitter.cpp
@@ -36,6 +36,7 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/Support/CodeGenCoverage.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/LowLevelTypeImpl.h"
@@ -43,8 +44,8 @@
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
-#include <string>
 #include <numeric>
+#include <string>
 using namespace llvm;
 
 #define DEBUG_TYPE "gisel-emitter"
@@ -52,9 +53,8 @@ using namespace llvm;
 STATISTIC(NumPatternTotal, "Total number of patterns");
 STATISTIC(NumPatternImported, "Number of patterns imported from SelectionDAG");
 STATISTIC(NumPatternImportsSkipped, "Number of SelectionDAG imports skipped");
+STATISTIC(NumPatternsTested, "Number of patterns executed according to coverage information");
 STATISTIC(NumPatternEmitted, "Number of patterns emitted");
-/// A unique identifier for a MatchTable.
-static unsigned CurrentMatchTableID = 0;
 
 cl::OptionCategory GlobalISelEmitterCat("Options for -gen-global-isel");
 
@@ -64,9 +64,35 @@ static cl::opt<bool> WarnOnSkippedPatterns(
              "in the GlobalISel selector"),
     cl::init(false), cl::cat(GlobalISelEmitterCat));
 
+static cl::opt<bool> GenerateCoverage(
+    "instrument-gisel-coverage",
+    cl::desc("Generate coverage instrumentation for GlobalISel"),
+    cl::init(false), cl::cat(GlobalISelEmitterCat));
+
+static cl::opt<std::string> UseCoverageFile(
+    "gisel-coverage-file", cl::init(""),
+    cl::desc("Specify file to retrieve coverage information from"),
+    cl::cat(GlobalISelEmitterCat));
+
+static cl::opt<bool> OptimizeMatchTable(
+    "optimize-match-table",
+    cl::desc("Generate an optimized version of the match table"),
+    cl::init(true), cl::cat(GlobalISelEmitterCat));
+
 namespace {
 //===- Helper functions ---------------------------------------------------===//
 
+/// Get the name of the enum value used to number the predicate function.
+std::string getEnumNameForPredicate(const TreePredicateFn &Predicate) {
+  return "GIPFP_" + Predicate.getImmTypeIdentifier().str() + "_" +
+         Predicate.getFnName();
+}
+
+/// Get the opcode used to check this predicate.
+std::string getMatchOpcodeForPredicate(const TreePredicateFn &Predicate) {
+  return "GIM_Check" + Predicate.getImmTypeIdentifier().str() + "ImmPredicate";
+}
+
 /// This class stands in for LLT wherever we want to tablegen-erate an
 /// equivalent at compiler run-time.
 class LLTCodeGen {
@@ -76,6 +102,14 @@ private:
 public:
   LLTCodeGen(const LLT &Ty) : Ty(Ty) {}
 
+  std::string getCxxEnumValue() const {
+    std::string Str;
+    raw_string_ostream OS(Str);
+
+    emitCxxEnumValue(OS);
+    return OS.str();
+  }
+
   void emitCxxEnumValue(raw_ostream &OS) const {
     if (Ty.isScalar()) {
       OS << "GILLT_s" << Ty.getSizeInBits();
@@ -85,6 +119,12 @@ public:
       OS << "GILLT_v" << Ty.getNumElements() << "s" << Ty.getScalarSizeInBits();
       return;
     }
+    if (Ty.isPointer()) {
+      OS << "GILLT_p" << Ty.getAddressSpace();
+      if (Ty.getSizeInBits() > 0)
+        OS << "s" << Ty.getSizeInBits();
+      return;
+    }
     llvm_unreachable("Unhandled LLT");
   }
 
@@ -98,37 +138,42 @@ public:
          << Ty.getScalarSizeInBits() << ")";
       return;
     }
+    if (Ty.isPointer() && Ty.getSizeInBits() > 0) {
+      OS << "LLT::pointer(" << Ty.getAddressSpace() << ", "
+         << Ty.getSizeInBits() << ")";
+      return;
+    }
     llvm_unreachable("Unhandled LLT");
   }
 
   const LLT &get() const { return Ty; }
 
   /// This ordering is used for std::unique() and std::sort(). There's no
-  /// particular logic behind the order.
+  /// particular logic behind the order but either A < B or B < A must be
+  /// true if A != B.
   bool operator<(const LLTCodeGen &Other) const {
+    if (Ty.isValid() != Other.Ty.isValid())
+      return Ty.isValid() < Other.Ty.isValid();
     if (!Ty.isValid())
-      return Other.Ty.isValid();
-    if (Ty.isScalar()) {
-      if (!Other.Ty.isValid())
-        return false;
-      if (Other.Ty.isScalar())
-        return Ty.getSizeInBits() < Other.Ty.getSizeInBits();
       return false;
-    }
-    if (Ty.isVector()) {
-      if (!Other.Ty.isValid() || Other.Ty.isScalar())
-        return false;
-      if (Other.Ty.isVector()) {
-        if (Ty.getNumElements() < Other.Ty.getNumElements())
-          return true;
-        if (Ty.getNumElements() > Other.Ty.getNumElements())
-          return false;
-        return Ty.getSizeInBits() < Other.Ty.getSizeInBits();
-      }
-      return false;
-    }
-    llvm_unreachable("Unhandled LLT");
+
+    if (Ty.isVector() != Other.Ty.isVector())
+      return Ty.isVector() < Other.Ty.isVector();
+    if (Ty.isScalar() != Other.Ty.isScalar())
+      return Ty.isScalar() < Other.Ty.isScalar();
+    if (Ty.isPointer() != Other.Ty.isPointer())
+      return Ty.isPointer() < Other.Ty.isPointer();
+
+    if (Ty.isPointer() && Ty.getAddressSpace() != Other.Ty.getAddressSpace())
+      return Ty.getAddressSpace() < Other.Ty.getAddressSpace();
+
+    if (Ty.isVector() && Ty.getNumElements() != Other.Ty.getNumElements())
+      return Ty.getNumElements() < Other.Ty.getNumElements();
+
+    return Ty.getSizeInBits() < Other.Ty.getSizeInBits();
   }
+
+  bool operator==(const LLTCodeGen &B) const { return Ty == B.Ty; }
 };
 
 class InstructionMatcher;
@@ -136,9 +181,11 @@ class InstructionMatcher;
 /// MVTs that don't map cleanly to an LLT (e.g., iPTR, *any, ...).
 static Optional<LLTCodeGen> MVTToLLT(MVT::SimpleValueType SVT) {
   MVT VT(SVT);
+
   if (VT.isVector() && VT.getVectorNumElements() != 1)
     return LLTCodeGen(
         LLT::vector(VT.getVectorNumElements(), VT.getScalarSizeInBits()));
+
   if (VT.isInteger() || VT.isFloatingPoint())
     return LLTCodeGen(LLT::scalar(VT.getSizeInBits()));
   return None;
@@ -150,10 +197,53 @@ static std::string explainPredicates(const TreePatternNode *N) {
   for (const auto &P : N->getPredicateFns()) {
     Explanation +=
         (Separator + P.getOrigPatFragRecord()->getRecord()->getName()).str();
+    Separator = ", ";
+
     if (P.isAlwaysTrue())
       Explanation += " always-true";
     if (P.isImmediatePattern())
       Explanation += " immediate";
+
+    if (P.isUnindexed())
+      Explanation += " unindexed";
+
+    if (P.isNonExtLoad())
+      Explanation += " non-extload";
+    if (P.isAnyExtLoad())
+      Explanation += " extload";
+    if (P.isSignExtLoad())
+      Explanation += " sextload";
+    if (P.isZeroExtLoad())
+      Explanation += " zextload";
+
+    if (P.isNonTruncStore())
+      Explanation += " non-truncstore";
+    if (P.isTruncStore())
+      Explanation += " truncstore";
+
+    if (Record *VT = P.getMemoryVT())
+      Explanation += (" MemVT=" + VT->getName()).str();
+    if (Record *VT = P.getScalarMemoryVT())
+      Explanation += (" ScalarVT(MemVT)=" + VT->getName()).str();
+
+    if (P.isAtomicOrderingMonotonic())
+      Explanation += " monotonic";
+    if (P.isAtomicOrderingAcquire())
+      Explanation += " acquire";
+    if (P.isAtomicOrderingRelease())
+      Explanation += " release";
+    if (P.isAtomicOrderingAcquireRelease())
+      Explanation += " acq_rel";
+    if (P.isAtomicOrderingSequentiallyConsistent())
+      Explanation += " seq_cst";
+    if (P.isAtomicOrderingAcquireOrStronger())
+      Explanation += " >=acquire";
+    if (P.isAtomicOrderingWeakerThanAcquire())
+      Explanation += " <acquire";
+    if (P.isAtomicOrderingReleaseOrStronger())
+      Explanation += " >=release";
+    if (P.isAtomicOrderingWeakerThanRelease())
+      Explanation += " <release";
   }
   return Explanation;
 }
@@ -165,7 +255,12 @@ std::string explainOperator(Record *Operator) {
   if (Operator->isSubClassOf("Intrinsic"))
     return (" (Operator is an Intrinsic, " + Operator->getName() + ")").str();
 
-  return " (Operator not understood)";
+  if (Operator->isSubClassOf("ComplexPattern"))
+    return (" (Operator is an unmapped ComplexPattern, " + Operator->getName() +
+            ")")
+        .str();
+
+  return (" (Operator " + Operator->getName() + " not understood)").str();
 }
 
 /// Helper function to let the emitter report skip reason error messages.
@@ -176,17 +271,48 @@ static Error failedImport(const Twine &Reason) {
 static Error isTrivialOperatorNode(const TreePatternNode *N) {
   std::string Explanation = "";
   std::string Separator = "";
-  if (N->isLeaf()) {
-    if (isa<IntInit>(N->getLeafValue()))
-      return Error::success();
 
-    Explanation = "Is a leaf";
-    Separator = ", ";
-  }
+  bool HasUnsupportedPredicate = false;
+  for (const auto &Predicate : N->getPredicateFns()) {
+    if (Predicate.isAlwaysTrue())
+      continue;
 
-  if (N->hasAnyPredicate()) {
+    if (Predicate.isImmediatePattern())
+      continue;
+
+    if (Predicate.isNonExtLoad())
+      continue;
+
+    if (Predicate.isNonTruncStore())
+      continue;
+
+    if (Predicate.isLoad() || Predicate.isStore()) {
+      if (Predicate.isUnindexed())
+        continue;
+    }
+
+    if (Predicate.isAtomic() && Predicate.getMemoryVT())
+      continue;
+
+    if (Predicate.isAtomic() &&
+        (Predicate.isAtomicOrderingMonotonic() ||
+         Predicate.isAtomicOrderingAcquire() ||
+         Predicate.isAtomicOrderingRelease() ||
+         Predicate.isAtomicOrderingAcquireRelease() ||
+         Predicate.isAtomicOrderingSequentiallyConsistent() ||
+         Predicate.isAtomicOrderingAcquireOrStronger() ||
+         Predicate.isAtomicOrderingWeakerThanAcquire() ||
+         Predicate.isAtomicOrderingReleaseOrStronger() ||
+         Predicate.isAtomicOrderingWeakerThanRelease()))
+      continue;
+
+    HasUnsupportedPredicate = true;
     Explanation = Separator + "Has a predicate (" + explainPredicates(N) + ")";
     Separator = ", ";
+    Explanation += (Separator + "first-failing:" +
+                    Predicate.getOrigPatFragRecord()->getRecord()->getName())
+                       .str();
+    break;
   }
 
   if (N->getTransformFn()) {
@@ -194,7 +320,7 @@ static Error isTrivialOperatorNode(const TreePatternNode *N) {
     Separator = ", ";
   }
 
-  if (!N->isLeaf() && !N->hasAnyPredicate() && !N->getTransformFn())
+  if (!HasUnsupportedPredicate && !N->getTransformFn())
     return Error::success();
 
   return failedImport(Explanation);
@@ -217,13 +343,267 @@ getNameForFeatureBitset(const std::vector<Record *> &FeatureBitset) {
     Name += ("_" + Feature->getName()).str();
   return Name;
 }
+
+//===- MatchTable Helpers -------------------------------------------------===//
+
+class MatchTable;
+
+/// A record to be stored in a MatchTable.
+///
+/// This class represents any and all output that may be required to emit the
+/// MatchTable. Instances  are most often configured to represent an opcode or
+/// value that will be emitted to the table with some formatting but it can also
+/// represent commas, comments, and other formatting instructions.
+struct MatchTableRecord {
+  enum RecordFlagsBits {
+    MTRF_None = 0x0,
+    /// Causes EmitStr to be formatted as comment when emitted.
+    MTRF_Comment = 0x1,
+    /// Causes the record value to be followed by a comma when emitted.
+    MTRF_CommaFollows = 0x2,
+    /// Causes the record value to be followed by a line break when emitted.
+    MTRF_LineBreakFollows = 0x4,
+    /// Indicates that the record defines a label and causes an additional
+    /// comment to be emitted containing the index of the label.
+    MTRF_Label = 0x8,
+    /// Causes the record to be emitted as the index of the label specified by
+    /// LabelID along with a comment indicating where that label is.
+    MTRF_JumpTarget = 0x10,
+    /// Causes the formatter to add a level of indentation before emitting the
+    /// record.
+    MTRF_Indent = 0x20,
+    /// Causes the formatter to remove a level of indentation after emitting the
+    /// record.
+    MTRF_Outdent = 0x40,
+  };
+
+  /// When MTRF_Label or MTRF_JumpTarget is used, indicates a label id to
+  /// reference or define.
+  unsigned LabelID;
+  /// The string to emit. Depending on the MTRF_* flags it may be a comment, a
+  /// value, a label name.
+  std::string EmitStr;
+
+private:
+  /// The number of MatchTable elements described by this record. Comments are 0
+  /// while values are typically 1. Values >1 may occur when we need to emit
+  /// values that exceed the size of a MatchTable element.
+  unsigned NumElements;
+
+public:
+  /// A bitfield of RecordFlagsBits flags.
+  unsigned Flags;
+
+  MatchTableRecord(Optional<unsigned> LabelID_, StringRef EmitStr,
+                   unsigned NumElements, unsigned Flags)
+      : LabelID(LabelID_.hasValue() ? LabelID_.getValue() : ~0u),
+        EmitStr(EmitStr), NumElements(NumElements), Flags(Flags) {
+    assert((!LabelID_.hasValue() || LabelID != ~0u) &&
+           "This value is reserved for non-labels");
+  }
+
+  void emit(raw_ostream &OS, bool LineBreakNextAfterThis,
+            const MatchTable &Table) const;
+  unsigned size() const { return NumElements; }
+};
+
+/// Holds the contents of a generated MatchTable to enable formatting and the
+/// necessary index tracking needed to support GIM_Try.
+class MatchTable {
+  /// An unique identifier for the table. The generated table will be named
+  /// MatchTable${ID}.
+  unsigned ID;
+  /// The records that make up the table. Also includes comments describing the
+  /// values being emitted and line breaks to format it.
+  std::vector<MatchTableRecord> Contents;
+  /// The currently defined labels.
+  DenseMap<unsigned, unsigned> LabelMap;
+  /// Tracks the sum of MatchTableRecord::NumElements as the table is built.
+  unsigned CurrentSize;
+
+  /// A unique identifier for a MatchTable label.
+  static unsigned CurrentLabelID;
+
+public:
+  static MatchTableRecord LineBreak;
+  static MatchTableRecord Comment(StringRef Comment) {
+    return MatchTableRecord(None, Comment, 0, MatchTableRecord::MTRF_Comment);
+  }
+  static MatchTableRecord Opcode(StringRef Opcode, int IndentAdjust = 0) {
+    unsigned ExtraFlags = 0;
+    if (IndentAdjust > 0)
+      ExtraFlags |= MatchTableRecord::MTRF_Indent;
+    if (IndentAdjust < 0)
+      ExtraFlags |= MatchTableRecord::MTRF_Outdent;
+
+    return MatchTableRecord(None, Opcode, 1,
+                            MatchTableRecord::MTRF_CommaFollows | ExtraFlags);
+  }
+  static MatchTableRecord NamedValue(StringRef NamedValue) {
+    return MatchTableRecord(None, NamedValue, 1,
+                            MatchTableRecord::MTRF_CommaFollows);
+  }
+  static MatchTableRecord NamedValue(StringRef Namespace,
+                                     StringRef NamedValue) {
+    return MatchTableRecord(None, (Namespace + "::" + NamedValue).str(), 1,
+                            MatchTableRecord::MTRF_CommaFollows);
+  }
+  static MatchTableRecord IntValue(int64_t IntValue) {
+    return MatchTableRecord(None, llvm::to_string(IntValue), 1,
+                            MatchTableRecord::MTRF_CommaFollows);
+  }
+  static MatchTableRecord Label(unsigned LabelID) {
+    return MatchTableRecord(LabelID, "Label " + llvm::to_string(LabelID), 0,
+                            MatchTableRecord::MTRF_Label |
+                                MatchTableRecord::MTRF_Comment |
+                                MatchTableRecord::MTRF_LineBreakFollows);
+  }
+  static MatchTableRecord JumpTarget(unsigned LabelID) {
+    return MatchTableRecord(LabelID, "Label " + llvm::to_string(LabelID), 1,
+                            MatchTableRecord::MTRF_JumpTarget |
+                                MatchTableRecord::MTRF_Comment |
+                                MatchTableRecord::MTRF_CommaFollows);
+  }
+
+  MatchTable(unsigned ID) : ID(ID), CurrentSize(0) {}
+
+  void push_back(const MatchTableRecord &Value) {
+    if (Value.Flags & MatchTableRecord::MTRF_Label)
+      defineLabel(Value.LabelID);
+    Contents.push_back(Value);
+    CurrentSize += Value.size();
+  }
+
+  unsigned allocateLabelID() const { return CurrentLabelID++; }
+
+  void defineLabel(unsigned LabelID) {
+    LabelMap.insert(std::make_pair(LabelID, CurrentSize));
+  }
+
+  unsigned getLabelIndex(unsigned LabelID) const {
+    const auto I = LabelMap.find(LabelID);
+    assert(I != LabelMap.end() && "Use of undeclared label");
+    return I->second;
+  }
+
+  void emitUse(raw_ostream &OS) const { OS << "MatchTable" << ID; }
+
+  void emitDeclaration(raw_ostream &OS) const {
+    unsigned Indentation = 4;
+    OS << "  constexpr static int64_t MatchTable" << ID << "[] = {";
+    LineBreak.emit(OS, true, *this);
+    OS << std::string(Indentation, ' ');
+
+    for (auto I = Contents.begin(), E = Contents.end(); I != E;
+         ++I) {
+      bool LineBreakIsNext = false;
+      const auto &NextI = std::next(I);
+
+      if (NextI != E) {
+        if (NextI->EmitStr == "" &&
+            NextI->Flags == MatchTableRecord::MTRF_LineBreakFollows)
+          LineBreakIsNext = true;
+      }
+
+      if (I->Flags & MatchTableRecord::MTRF_Indent)
+        Indentation += 2;
+
+      I->emit(OS, LineBreakIsNext, *this);
+      if (I->Flags & MatchTableRecord::MTRF_LineBreakFollows)
+        OS << std::string(Indentation, ' ');
+
+      if (I->Flags & MatchTableRecord::MTRF_Outdent)
+        Indentation -= 2;
+    }
+    OS << "};\n";
+  }
+};
+
+unsigned MatchTable::CurrentLabelID = 0;
+
+MatchTableRecord MatchTable::LineBreak = {
+    None, "" /* Emit String */, 0 /* Elements */,
+    MatchTableRecord::MTRF_LineBreakFollows};
+
+void MatchTableRecord::emit(raw_ostream &OS, bool LineBreakIsNextAfterThis,
+                            const MatchTable &Table) const {
+  bool UseLineComment =
+      LineBreakIsNextAfterThis | (Flags & MTRF_LineBreakFollows);
+  if (Flags & (MTRF_JumpTarget | MTRF_CommaFollows))
+    UseLineComment = false;
+
+  if (Flags & MTRF_Comment)
+    OS << (UseLineComment ? "// " : "/*");
+
+  OS << EmitStr;
+  if (Flags & MTRF_Label)
+    OS << ": @" << Table.getLabelIndex(LabelID);
+
+  if (Flags & MTRF_Comment && !UseLineComment)
+    OS << "*/";
+
+  if (Flags & MTRF_JumpTarget) {
+    if (Flags & MTRF_Comment)
+      OS << " ";
+    OS << Table.getLabelIndex(LabelID);
+  }
+
+  if (Flags & MTRF_CommaFollows) {
+    OS << ",";
+    if (!LineBreakIsNextAfterThis && !(Flags & MTRF_LineBreakFollows))
+      OS << " ";
+  }
+
+  if (Flags & MTRF_LineBreakFollows)
+    OS << "\n";
+}
+
+MatchTable &operator<<(MatchTable &Table, const MatchTableRecord &Value) {
+  Table.push_back(Value);
+  return Table;
+}
+
 //===- Matchers -----------------------------------------------------------===//
 
 class OperandMatcher;
 class MatchAction;
+class PredicateMatcher;
+class RuleMatcher;
+
+class Matcher {
+public:
+  virtual ~Matcher() = default;
+  virtual void emit(MatchTable &Table) = 0;
+};
+
+class GroupMatcher : public Matcher {
+  SmallVector<std::unique_ptr<PredicateMatcher>, 8> Conditions;
+  SmallVector<Matcher *, 8> Rules;
+
+public:
+  void addCondition(std::unique_ptr<PredicateMatcher> &&Predicate) {
+    Conditions.emplace_back(std::move(Predicate));
+  }
+  void addRule(Matcher &Rule) { Rules.push_back(&Rule); }
+  const std::unique_ptr<PredicateMatcher> &conditions_back() const {
+    return Conditions.back();
+  }
+  bool lastConditionMatches(const PredicateMatcher &Predicate) const;
+  bool conditions_empty() const { return Conditions.empty(); }
+  void clear() {
+    Conditions.clear();
+    Rules.clear();
+  }
+  void emit(MatchTable &Table) override;
+};
 
 /// Generates code to check that a match rule matches.
-class RuleMatcher {
+class RuleMatcher : public Matcher {
+public:
+  using ActionVec = std::vector<std::unique_ptr<MatchAction>>;
+  using action_iterator = ActionVec::iterator;
+
+protected:
   /// A list of matchers that all need to succeed for the current rule to match.
   /// FIXME: This currently supports a single match position but could be
   /// extended to support multiple positions to support div/rem fusion or
@@ -232,40 +612,133 @@ class RuleMatcher {
 
   /// A list of actions that need to be taken when all predicates in this rule
   /// have succeeded.
-  std::vector<std::unique_ptr<MatchAction>> Actions;
+  ActionVec Actions;
+
+  using DefinedInsnVariablesMap =
+      std::map<const InstructionMatcher *, unsigned>;
 
   /// A map of instruction matchers to the local variables created by
   /// emitCaptureOpcodes().
-  std::map<const InstructionMatcher *, unsigned> InsnVariableIDs;
+  DefinedInsnVariablesMap InsnVariableIDs;
+
+  using MutatableInsnSet = SmallPtrSet<const InstructionMatcher *, 4>;
+
+  // The set of instruction matchers that have not yet been claimed for mutation
+  // by a BuildMI.
+  MutatableInsnSet MutatableInsns;
+
+  /// A map of named operands defined by the matchers that may be referenced by
+  /// the renderers.
+  StringMap<OperandMatcher *> DefinedOperands;
 
   /// ID for the next instruction variable defined with defineInsnVar()
   unsigned NextInsnVarID;
 
+  /// ID for the next output instruction allocated with allocateOutputInsnID()
+  unsigned NextOutputInsnID;
+
+  /// ID for the next temporary register ID allocated with allocateTempRegID()
+  unsigned NextTempRegID;
+
   std::vector<Record *> RequiredFeatures;
 
+  ArrayRef<SMLoc> SrcLoc;
+
+  typedef std::tuple<Record *, unsigned, unsigned>
+      DefinedComplexPatternSubOperand;
+  typedef StringMap<DefinedComplexPatternSubOperand>
+      DefinedComplexPatternSubOperandMap;
+  /// A map of Symbolic Names to ComplexPattern sub-operands.
+  DefinedComplexPatternSubOperandMap ComplexSubOperands;
+
+  uint64_t RuleID;
+  static uint64_t NextRuleID;
+
 public:
-  RuleMatcher()
-      : Matchers(), Actions(), InsnVariableIDs(), NextInsnVarID(0) {}
+  RuleMatcher(ArrayRef<SMLoc> SrcLoc)
+      : Matchers(), Actions(), InsnVariableIDs(), MutatableInsns(),
+        DefinedOperands(), NextInsnVarID(0), NextOutputInsnID(0),
+        NextTempRegID(0), SrcLoc(SrcLoc), ComplexSubOperands(),
+        RuleID(NextRuleID++) {}
   RuleMatcher(RuleMatcher &&Other) = default;
   RuleMatcher &operator=(RuleMatcher &&Other) = default;
 
-  InstructionMatcher &addInstructionMatcher();
+  uint64_t getRuleID() const { return RuleID; }
+
+  InstructionMatcher &addInstructionMatcher(StringRef SymbolicName);
   void addRequiredFeature(Record *Feature);
   const std::vector<Record *> &getRequiredFeatures() const;
 
   template <class Kind, class... Args> Kind &addAction(Args &&... args);
+  template <class Kind, class... Args>
+  action_iterator insertAction(action_iterator InsertPt, Args &&... args);
 
   /// Define an instruction without emitting any code to do so.
   /// This is used for the root of the match.
   unsigned implicitlyDefineInsnVar(const InstructionMatcher &Matcher);
+  void clearImplicitMap() {
+    NextInsnVarID = 0;
+    InsnVariableIDs.clear();
+  };
   /// Define an instruction and emit corresponding state-machine opcodes.
-  unsigned defineInsnVar(raw_ostream &OS, const InstructionMatcher &Matcher,
+  unsigned defineInsnVar(MatchTable &Table, const InstructionMatcher &Matcher,
                          unsigned InsnVarID, unsigned OpIdx);
   unsigned getInsnVarID(const InstructionMatcher &InsnMatcher) const;
+  DefinedInsnVariablesMap::const_iterator defined_insn_vars_begin() const {
+    return InsnVariableIDs.begin();
+  }
+  DefinedInsnVariablesMap::const_iterator defined_insn_vars_end() const {
+    return InsnVariableIDs.end();
+  }
+  iterator_range<typename DefinedInsnVariablesMap::const_iterator>
+  defined_insn_vars() const {
+    return make_range(defined_insn_vars_begin(), defined_insn_vars_end());
+  }
+
+  MutatableInsnSet::const_iterator mutatable_insns_begin() const {
+    return MutatableInsns.begin();
+  }
+  MutatableInsnSet::const_iterator mutatable_insns_end() const {
+    return MutatableInsns.end();
+  }
+  iterator_range<typename MutatableInsnSet::const_iterator>
+  mutatable_insns() const {
+    return make_range(mutatable_insns_begin(), mutatable_insns_end());
+  }
+  void reserveInsnMatcherForMutation(const InstructionMatcher *InsnMatcher) {
+    bool R = MutatableInsns.erase(InsnMatcher);
+    assert(R && "Reserving a mutatable insn that isn't available");
+    (void)R;
+  }
+
+  action_iterator actions_begin() { return Actions.begin(); }
+  action_iterator actions_end() { return Actions.end(); }
+  iterator_range<action_iterator> actions() {
+    return make_range(actions_begin(), actions_end());
+  }
 
-  void emitCaptureOpcodes(raw_ostream &OS);
+  void defineOperand(StringRef SymbolicName, OperandMatcher &OM);
 
-  void emit(raw_ostream &OS);
+  void defineComplexSubOperand(StringRef SymbolicName, Record *ComplexPattern,
+                               unsigned RendererID, unsigned SubOperandID) {
+    assert(ComplexSubOperands.count(SymbolicName) == 0 && "Already defined");
+    ComplexSubOperands[SymbolicName] =
+        std::make_tuple(ComplexPattern, RendererID, SubOperandID);
+  }
+  Optional<DefinedComplexPatternSubOperand>
+  getComplexSubOperand(StringRef SymbolicName) const {
+    const auto &I = ComplexSubOperands.find(SymbolicName);
+    if (I == ComplexSubOperands.end())
+      return None;
+    return I->second;
+  }
+
+  const InstructionMatcher &getInstructionMatcher(StringRef SymbolicName) const;
+  const OperandMatcher &getOperandMatcher(StringRef Name) const;
+
+  void emitCaptureOpcodes(MatchTable &Table);
+
+  void emit(MatchTable &Table) override;
 
   /// Compare the priority of this object and B.
   ///
@@ -276,22 +749,38 @@ public:
   /// matcher.
   unsigned countRendererFns() const;
 
+  std::unique_ptr<PredicateMatcher> forgetFirstCondition();
+
   // FIXME: Remove this as soon as possible
-  InstructionMatcher &insnmatcher_front() const { return *Matchers.front(); }
+  InstructionMatcher &insnmatchers_front() const { return *Matchers.front(); }
+
+  unsigned allocateOutputInsnID() { return NextOutputInsnID++; }
+  unsigned allocateTempRegID() { return NextTempRegID++; }
+
+  bool insnmatchers_empty() const { return Matchers.empty(); }
+  void insnmatchers_pop_front() { Matchers.erase(Matchers.begin()); }
 };
 
+uint64_t RuleMatcher::NextRuleID = 0;
+
+using action_iterator = RuleMatcher::action_iterator;
+
 template <class PredicateTy> class PredicateListMatcher {
 private:
   typedef std::vector<std::unique_ptr<PredicateTy>> PredicateVec;
   PredicateVec Predicates;
 
+  /// Template instantiations should specialize this to return a string to use
+  /// for the comment emitted when there are no predicates.
+  std::string getNoPredicateComment() const;
+
 public:
   /// Construct a new operand predicate and add it to the matcher.
   template <class Kind, class... Args>
-  Kind &addPredicate(Args&&... args) {
+  Optional<Kind *> addPredicate(Args&&... args) {
     Predicates.emplace_back(
         llvm::make_unique<Kind>(std::forward<Args>(args)...));
-    return *static_cast<Kind *>(Predicates.back().get());
+    return static_cast<Kind *>(Predicates.back().get());
   }
 
   typename PredicateVec::const_iterator predicates_begin() const {
@@ -306,27 +795,34 @@ public:
   typename PredicateVec::size_type predicates_size() const {
     return Predicates.size();
   }
+  bool predicates_empty() const { return Predicates.empty(); }
+
+  std::unique_ptr<PredicateTy> predicates_pop_front() {
+    std::unique_ptr<PredicateTy> Front = std::move(Predicates.front());
+    Predicates.erase(Predicates.begin());
+    return Front;
+  }
 
   /// Emit MatchTable opcodes that tests whether all the predicates are met.
   template <class... Args>
-  void emitPredicateListOpcodes(raw_ostream &OS, Args &&... args) const {
+  void emitPredicateListOpcodes(MatchTable &Table, Args &&... args) const {
     if (Predicates.empty()) {
-      OS << "// No predicates\n";
+      Table << MatchTable::Comment(getNoPredicateComment())
+            << MatchTable::LineBreak;
       return;
     }
 
-    for (const auto &Predicate : predicates())
-      Predicate->emitPredicateOpcodes(OS, std::forward<Args>(args)...);
+    unsigned OpIdx = (*predicates_begin())->getOpIdx();
+    (void)OpIdx;
+    for (const auto &Predicate : predicates()) {
+      assert(Predicate->getOpIdx() == OpIdx &&
+             "Checks touch different operands?");
+      Predicate->emitPredicateOpcodes(Table, std::forward<Args>(args)...);
+    }
   }
 };
 
-/// Generates code to check a predicate of an operand.
-///
-/// Typical predicates include:
-/// * Operand is a particular register.
-/// * Operand is assigned a particular register bank.
-/// * Operand is an MBB.
-class OperandPredicateMatcher {
+class PredicateMatcher {
 public:
   /// This enum is used for RTTI and also defines the priority that is given to
   /// the predicate when generating the matcher code. Kinds with higher priority
@@ -335,80 +831,172 @@ public:
   /// The relative priority of OPM_LLT, OPM_RegBank, and OPM_MBB do not matter
   /// but OPM_Int must have priority over OPM_RegBank since constant integers
   /// are represented by a virtual register defined by a G_CONSTANT instruction.
+  ///
+  /// Note: The relative priority between IPM_ and OPM_ does not matter, they
+  /// are currently not compared between each other.
   enum PredicateKind {
+    IPM_Opcode,
+    IPM_ImmPredicate,
+    IPM_AtomicOrderingMMO,
+    OPM_SameOperand,
     OPM_ComplexPattern,
-    OPM_Instruction,
     OPM_IntrinsicID,
+    OPM_Instruction,
     OPM_Int,
     OPM_LiteralInt,
     OPM_LLT,
+    OPM_PointerToAny,
     OPM_RegBank,
     OPM_MBB,
   };
 
 protected:
   PredicateKind Kind;
+  unsigned InsnVarID;
+  unsigned OpIdx;
 
 public:
-  OperandPredicateMatcher(PredicateKind Kind) : Kind(Kind) {}
-  virtual ~OperandPredicateMatcher() {}
+  PredicateMatcher(PredicateKind Kind, unsigned InsnVarID, unsigned OpIdx = ~0)
+      : Kind(Kind), InsnVarID(InsnVarID), OpIdx(OpIdx) {}
+
+  unsigned getOpIdx() const { return OpIdx; }
+  virtual ~PredicateMatcher() = default;
+  /// Emit MatchTable opcodes that check the predicate for the given operand.
+  virtual void emitPredicateOpcodes(MatchTable &Table,
+                                    RuleMatcher &Rule) const = 0;
 
   PredicateKind getKind() const { return Kind; }
 
-  /// Return the OperandMatcher for the specified operand or nullptr if there
-  /// isn't one by that name in this operand predicate matcher.
-  ///
-  /// InstructionOperandMatcher is the only subclass that can return non-null
-  /// for this.
-  virtual Optional<const OperandMatcher *>
-  getOptionalOperand(StringRef SymbolicName) const {
-    assert(!SymbolicName.empty() && "Cannot lookup unnamed operand");
-    return None;
+  virtual bool isIdentical(const PredicateMatcher &B) const {
+    if (InsnVarID != 0 || OpIdx != (unsigned)~0) {
+      // We currently don't hoist the record of instruction properly.
+      // Therefore we can only work on the orig instruction (InsnVarID
+      // == 0).
+      DEBUG(dbgs() << "Non-zero instr ID not supported yet\n");
+      return false;
+    }
+    return B.getKind() == getKind() && InsnVarID == B.InsnVarID &&
+           OpIdx == B.OpIdx;
   }
+};
+
+/// Generates code to check a predicate of an operand.
+///
+/// Typical predicates include:
+/// * Operand is a particular register.
+/// * Operand is assigned a particular register bank.
+/// * Operand is an MBB.
+class OperandPredicateMatcher : public PredicateMatcher {
+public:
+  OperandPredicateMatcher(PredicateKind Kind, unsigned InsnVarID,
+                          unsigned OpIdx)
+      : PredicateMatcher(Kind, InsnVarID, OpIdx) {}
+  virtual ~OperandPredicateMatcher() {}
 
   /// Emit MatchTable opcodes to capture instructions into the MIs table.
   ///
   /// Only InstructionOperandMatcher needs to do anything for this method the
   /// rest just walk the tree.
-  virtual void emitCaptureOpcodes(raw_ostream &OS, RuleMatcher &Rule,
-                                  unsigned InsnVarID, unsigned OpIdx) const {}
-
-  /// Emit MatchTable opcodes that check the predicate for the given operand.
-  virtual void emitPredicateOpcodes(raw_ostream &OS, RuleMatcher &Rule,
-                                    unsigned InsnVarID,
-                                    unsigned OpIdx) const = 0;
+  virtual void emitCaptureOpcodes(MatchTable &Table, RuleMatcher &Rule) const {}
 
   /// Compare the priority of this object and B.
   ///
   /// Returns true if this object is more important than B.
-  virtual bool isHigherPriorityThan(const OperandPredicateMatcher &B) const {
-    return Kind < B.Kind;
-  };
+  virtual bool isHigherPriorityThan(const OperandPredicateMatcher &B) const;
 
   /// Report the maximum number of temporary operands needed by the predicate
   /// matcher.
   virtual unsigned countRendererFns() const { return 0; }
 };
 
+template <>
+std::string
+PredicateListMatcher<OperandPredicateMatcher>::getNoPredicateComment() const {
+  return "No operand predicates";
+}
+
+/// Generates code to check that a register operand is defined by the same exact
+/// one as another.
+class SameOperandMatcher : public OperandPredicateMatcher {
+  std::string MatchingName;
+
+public:
+  SameOperandMatcher(StringRef MatchingName, unsigned InsnVarID, unsigned OpIdx)
+      : OperandPredicateMatcher(OPM_SameOperand, InsnVarID, OpIdx),
+        MatchingName(MatchingName) {}
+
+  static bool classof(const OperandPredicateMatcher *P) {
+    return P->getKind() == OPM_SameOperand;
+  }
+
+  void emitPredicateOpcodes(MatchTable &Table,
+                            RuleMatcher &Rule) const override;
+};
+
 /// Generates code to check that an operand is a particular LLT.
 class LLTOperandMatcher : public OperandPredicateMatcher {
 protected:
   LLTCodeGen Ty;
 
 public:
-  LLTOperandMatcher(const LLTCodeGen &Ty)
-      : OperandPredicateMatcher(OPM_LLT), Ty(Ty) {}
+  static std::set<LLTCodeGen> KnownTypes;
 
-  static bool classof(const OperandPredicateMatcher *P) {
+  LLTOperandMatcher(const LLTCodeGen &Ty, unsigned InsnVarID, unsigned OpIdx)
+      : OperandPredicateMatcher(OPM_LLT, InsnVarID, OpIdx), Ty(Ty) {
+    KnownTypes.insert(Ty);
+  }
+
+  static bool classof(const PredicateMatcher *P) {
     return P->getKind() == OPM_LLT;
   }
+  bool isIdentical(const PredicateMatcher &B) const override {
+    return OperandPredicateMatcher::isIdentical(B) &&
+           Ty == cast<LLTOperandMatcher>(&B)->Ty;
+  }
 
-  void emitPredicateOpcodes(raw_ostream &OS, RuleMatcher &Rule,
-                            unsigned InsnVarID, unsigned OpIdx) const override {
-    OS << "    GIM_CheckType, /*MI*/" << InsnVarID << ", /*Op*/" << OpIdx
-       << ", /*Type*/";
-    Ty.emitCxxEnumValue(OS);
-    OS << ", \n";
+  void emitPredicateOpcodes(MatchTable &Table,
+                            RuleMatcher &Rule) const override {
+    Table << MatchTable::Opcode("GIM_CheckType") << MatchTable::Comment("MI")
+          << MatchTable::IntValue(InsnVarID) << MatchTable::Comment("Op")
+          << MatchTable::IntValue(OpIdx) << MatchTable::Comment("Type")
+          << MatchTable::NamedValue(Ty.getCxxEnumValue())
+          << MatchTable::LineBreak;
+  }
+};
+
+std::set<LLTCodeGen> LLTOperandMatcher::KnownTypes;
+
+/// Generates code to check that an operand is a pointer to any address space.
+///
+/// In SelectionDAG, the types did not describe pointers or address spaces. As a
+/// result, iN is used to describe a pointer of N bits to any address space and
+/// PatFrag predicates are typically used to constrain the address space. There's
+/// no reliable means to derive the missing type information from the pattern so
+/// imported rules must test the components of a pointer separately.
+///
+/// If SizeInBits is zero, then the pointer size will be obtained from the
+/// subtarget.
+class PointerToAnyOperandMatcher : public OperandPredicateMatcher {
+protected:
+  unsigned SizeInBits;
+
+public:
+  PointerToAnyOperandMatcher(unsigned SizeInBits, unsigned InsnVarID,
+                             unsigned OpIdx)
+      : OperandPredicateMatcher(OPM_PointerToAny, InsnVarID, OpIdx),
+        SizeInBits(SizeInBits) {}
+
+  static bool classof(const OperandPredicateMatcher *P) {
+    return P->getKind() == OPM_PointerToAny;
+  }
+
+  void emitPredicateOpcodes(MatchTable &Table,
+                            RuleMatcher &Rule) const override {
+    Table << MatchTable::Opcode("GIM_CheckPointerToAny")
+          << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+          << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx)
+          << MatchTable::Comment("SizeInBits")
+          << MatchTable::IntValue(SizeInBits) << MatchTable::LineBreak;
   }
 };
 
@@ -421,21 +1009,27 @@ protected:
   unsigned getAllocatedTemporariesBaseID() const;
 
 public:
+  bool isIdentical(const PredicateMatcher &B) const override { return false; }
+
   ComplexPatternOperandMatcher(const OperandMatcher &Operand,
-                               const Record &TheDef)
-      : OperandPredicateMatcher(OPM_ComplexPattern), Operand(Operand),
-        TheDef(TheDef) {}
+                               const Record &TheDef, unsigned InsnVarID,
+                               unsigned OpIdx)
+      : OperandPredicateMatcher(OPM_ComplexPattern, InsnVarID, OpIdx),
+        Operand(Operand), TheDef(TheDef) {}
 
-  static bool classof(const OperandPredicateMatcher *P) {
+  static bool classof(const PredicateMatcher *P) {
     return P->getKind() == OPM_ComplexPattern;
   }
 
-  void emitPredicateOpcodes(raw_ostream &OS, RuleMatcher &Rule,
-                            unsigned InsnVarID, unsigned OpIdx) const override {
+  void emitPredicateOpcodes(MatchTable &Table,
+                            RuleMatcher &Rule) const override {
     unsigned ID = getAllocatedTemporariesBaseID();
-    OS << "    GIM_CheckComplexPattern, /*MI*/" << InsnVarID << ", /*Op*/"
-       << OpIdx << ", /*Renderer*/" << ID << ", GICP_"
-       << TheDef.getName() << ",\n";
+    Table << MatchTable::Opcode("GIM_CheckComplexPattern")
+          << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+          << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx)
+          << MatchTable::Comment("Renderer") << MatchTable::IntValue(ID)
+          << MatchTable::NamedValue(("GICP_" + TheDef.getName()).str())
+          << MatchTable::LineBreak;
   }
 
   unsigned countRendererFns() const override {
@@ -449,32 +1043,45 @@ protected:
   const CodeGenRegisterClass &RC;
 
 public:
-  RegisterBankOperandMatcher(const CodeGenRegisterClass &RC)
-      : OperandPredicateMatcher(OPM_RegBank), RC(RC) {}
+  RegisterBankOperandMatcher(const CodeGenRegisterClass &RC, unsigned InsnVarID,
+                             unsigned OpIdx)
+      : OperandPredicateMatcher(OPM_RegBank, InsnVarID, OpIdx), RC(RC) {}
 
-  static bool classof(const OperandPredicateMatcher *P) {
+  bool isIdentical(const PredicateMatcher &B) const override {
+    return OperandPredicateMatcher::isIdentical(B) &&
+           RC.getDef() == cast<RegisterBankOperandMatcher>(&B)->RC.getDef();
+  }
+
+  static bool classof(const PredicateMatcher *P) {
     return P->getKind() == OPM_RegBank;
   }
 
-  void emitPredicateOpcodes(raw_ostream &OS, RuleMatcher &Rule,
-                            unsigned InsnVarID, unsigned OpIdx) const override {
-    OS << "    GIM_CheckRegBankForClass, /*MI*/" << InsnVarID << ", /*Op*/"
-       << OpIdx << ", /*RC*/" << RC.getQualifiedName() << "RegClassID,\n";
+  void emitPredicateOpcodes(MatchTable &Table,
+                            RuleMatcher &Rule) const override {
+    Table << MatchTable::Opcode("GIM_CheckRegBankForClass")
+          << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+          << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx)
+          << MatchTable::Comment("RC")
+          << MatchTable::NamedValue(RC.getQualifiedName() + "RegClassID")
+          << MatchTable::LineBreak;
   }
 };
 
 /// Generates code to check that an operand is a basic block.
 class MBBOperandMatcher : public OperandPredicateMatcher {
 public:
-  MBBOperandMatcher() : OperandPredicateMatcher(OPM_MBB) {}
+  MBBOperandMatcher(unsigned InsnVarID, unsigned OpIdx)
+      : OperandPredicateMatcher(OPM_MBB, InsnVarID, OpIdx) {}
 
-  static bool classof(const OperandPredicateMatcher *P) {
+  static bool classof(const PredicateMatcher *P) {
     return P->getKind() == OPM_MBB;
   }
 
-  void emitPredicateOpcodes(raw_ostream &OS, RuleMatcher &Rule,
-                            unsigned InsnVarID, unsigned OpIdx) const override {
-    OS << "    GIM_CheckIsMBB, /*MI*/" << InsnVarID << ", /*Op*/" << OpIdx << ",\n";
+  void emitPredicateOpcodes(MatchTable &Table,
+                            RuleMatcher &Rule) const override {
+    Table << MatchTable::Opcode("GIM_CheckIsMBB") << MatchTable::Comment("MI")
+          << MatchTable::IntValue(InsnVarID) << MatchTable::Comment("Op")
+          << MatchTable::IntValue(OpIdx) << MatchTable::LineBreak;
   }
 };
 
@@ -485,17 +1092,24 @@ protected:
   int64_t Value;
 
 public:
-  ConstantIntOperandMatcher(int64_t Value)
-      : OperandPredicateMatcher(OPM_Int), Value(Value) {}
+  ConstantIntOperandMatcher(int64_t Value, unsigned InsnVarID, unsigned OpIdx)
+      : OperandPredicateMatcher(OPM_Int, InsnVarID, OpIdx), Value(Value) {}
 
-  static bool classof(const OperandPredicateMatcher *P) {
+  bool isIdentical(const PredicateMatcher &B) const override {
+    return OperandPredicateMatcher::isIdentical(B) &&
+           Value == cast<ConstantIntOperandMatcher>(&B)->Value;
+  }
+
+  static bool classof(const PredicateMatcher *P) {
     return P->getKind() == OPM_Int;
   }
 
-  void emitPredicateOpcodes(raw_ostream &OS, RuleMatcher &Rule,
-                            unsigned InsnVarID, unsigned OpIdx) const override {
-    OS << "    GIM_CheckConstantInt, /*MI*/" << InsnVarID << ", /*Op*/"
-       << OpIdx << ", " << Value << ",\n";
+  void emitPredicateOpcodes(MatchTable &Table,
+                            RuleMatcher &Rule) const override {
+    Table << MatchTable::Opcode("GIM_CheckConstantInt")
+          << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+          << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx)
+          << MatchTable::IntValue(Value) << MatchTable::LineBreak;
   }
 };
 
@@ -506,17 +1120,25 @@ protected:
   int64_t Value;
 
 public:
-  LiteralIntOperandMatcher(int64_t Value)
-      : OperandPredicateMatcher(OPM_LiteralInt), Value(Value) {}
+  LiteralIntOperandMatcher(int64_t Value, unsigned InsnVarID, unsigned OpIdx)
+      : OperandPredicateMatcher(OPM_LiteralInt, InsnVarID, OpIdx),
+        Value(Value) {}
 
-  static bool classof(const OperandPredicateMatcher *P) {
+  bool isIdentical(const PredicateMatcher &B) const override {
+    return OperandPredicateMatcher::isIdentical(B) &&
+           Value == cast<LiteralIntOperandMatcher>(&B)->Value;
+  }
+
+  static bool classof(const PredicateMatcher *P) {
     return P->getKind() == OPM_LiteralInt;
   }
 
-  void emitPredicateOpcodes(raw_ostream &OS, RuleMatcher &Rule,
-                            unsigned InsnVarID, unsigned OpIdx) const override {
-    OS << "    GIM_CheckLiteralInt, /*MI*/" << InsnVarID << ", /*Op*/"
-       << OpIdx << ", " << Value << ",\n";
+  void emitPredicateOpcodes(MatchTable &Table,
+                            RuleMatcher &Rule) const override {
+    Table << MatchTable::Opcode("GIM_CheckLiteralInt")
+          << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+          << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx)
+          << MatchTable::IntValue(Value) << MatchTable::LineBreak;
   }
 };
 
@@ -526,17 +1148,26 @@ protected:
   const CodeGenIntrinsic *II;
 
 public:
-  IntrinsicIDOperandMatcher(const CodeGenIntrinsic *II)
-      : OperandPredicateMatcher(OPM_IntrinsicID), II(II) {}
+  IntrinsicIDOperandMatcher(const CodeGenIntrinsic *II, unsigned InsnVarID,
+                            unsigned OpIdx)
+      : OperandPredicateMatcher(OPM_IntrinsicID, InsnVarID, OpIdx), II(II) {}
 
-  static bool classof(const OperandPredicateMatcher *P) {
+  bool isIdentical(const PredicateMatcher &B) const override {
+    return OperandPredicateMatcher::isIdentical(B) &&
+           II == cast<IntrinsicIDOperandMatcher>(&B)->II;
+  }
+
+  static bool classof(const PredicateMatcher *P) {
     return P->getKind() == OPM_IntrinsicID;
   }
 
-  void emitPredicateOpcodes(raw_ostream &OS, RuleMatcher &Rule,
-                            unsigned InsnVarID, unsigned OpIdx) const override {
-    OS << "    GIM_CheckIntrinsicID, /*MI*/" << InsnVarID << ", /*Op*/"
-       << OpIdx << ", Intrinsic::" << II->EnumName << ",\n";
+  void emitPredicateOpcodes(MatchTable &Table,
+                            RuleMatcher &Rule) const override {
+    Table << MatchTable::Opcode("GIM_CheckIntrinsicID")
+          << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+          << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx)
+          << MatchTable::NamedValue("Intrinsic::" + II->EnumName)
+          << MatchTable::LineBreak;
   }
 };
 
@@ -567,45 +1198,37 @@ public:
     SymbolicName = Name;
   }
   unsigned getOperandIndex() const { return OpIdx; }
+  unsigned getInsnVarID() const;
 
   std::string getOperandExpr(unsigned InsnVarID) const {
     return "State.MIs[" + llvm::to_string(InsnVarID) + "]->getOperand(" +
            llvm::to_string(OpIdx) + ")";
   }
 
-  Optional<const OperandMatcher *>
-  getOptionalOperand(StringRef DesiredSymbolicName) const {
-    assert(!DesiredSymbolicName.empty() && "Cannot lookup unnamed operand");
-    if (DesiredSymbolicName == SymbolicName)
-      return this;
-    for (const auto &OP : predicates()) {
-      const auto &MaybeOperand = OP->getOptionalOperand(DesiredSymbolicName);
-      if (MaybeOperand.hasValue())
-        return MaybeOperand.getValue();
-    }
-    return None;
-  }
-
   InstructionMatcher &getInstructionMatcher() const { return Insn; }
 
+  Error addTypeCheckPredicate(const TypeSetByHwMode &VTy,
+                              bool OperandIsAPointer);
+
   /// Emit MatchTable opcodes to capture instructions into the MIs table.
-  void emitCaptureOpcodes(raw_ostream &OS, RuleMatcher &Rule,
-                          unsigned InsnVarID) const {
+  void emitCaptureOpcodes(MatchTable &Table, RuleMatcher &Rule) const {
     for (const auto &Predicate : predicates())
-      Predicate->emitCaptureOpcodes(OS, Rule, InsnVarID, OpIdx);
+      Predicate->emitCaptureOpcodes(Table, Rule);
   }
 
   /// Emit MatchTable opcodes that test whether the instruction named in
   /// InsnVarID matches all the predicates and all the operands.
-  void emitPredicateOpcodes(raw_ostream &OS, RuleMatcher &Rule,
-                            unsigned InsnVarID) const {
-    OS << "    // MIs[" << InsnVarID << "] ";
+  void emitPredicateOpcodes(MatchTable &Table, RuleMatcher &Rule) const {
+    std::string Comment;
+    raw_string_ostream CommentOS(Comment);
+    CommentOS << "MIs[" << getInsnVarID() << "] ";
     if (SymbolicName.empty())
-      OS << "Operand " << OpIdx;
+      CommentOS << "Operand " << OpIdx;
     else
-      OS << SymbolicName;
-    OS << "\n";
-    emitPredicateListOpcodes(OS, Rule, InsnVarID, OpIdx);
+      CommentOS << SymbolicName;
+    Table << MatchTable::Comment(CommentOS.str()) << MatchTable::LineBreak;
+
+    emitPredicateListOpcodes(Table, Rule);
   }
 
   /// Compare the priority of this object and B.
@@ -643,8 +1266,51 @@ public:
   unsigned getAllocatedTemporariesBaseID() const {
     return AllocatedTemporariesBaseID;
   }
+
+  bool isSameAsAnotherOperand() const {
+    for (const auto &Predicate : predicates())
+      if (isa<SameOperandMatcher>(Predicate))
+        return true;
+    return false;
+  }
 };
 
+// Specialize OperandMatcher::addPredicate() to refrain from adding redundant
+// predicates.
+template <>
+template <class Kind, class... Args>
+Optional<Kind *>
+PredicateListMatcher<OperandPredicateMatcher>::addPredicate(Args &&... args) {
+  auto *OpMatcher = static_cast<OperandMatcher *>(this);
+  if (static_cast<OperandMatcher *>(this)->isSameAsAnotherOperand())
+    return None;
+  Predicates.emplace_back(llvm::make_unique<Kind>(
+      std::forward<Args>(args)..., OpMatcher->getInsnVarID(),
+      OpMatcher->getOperandIndex()));
+  return static_cast<Kind *>(Predicates.back().get());
+}
+
+Error OperandMatcher::addTypeCheckPredicate(const TypeSetByHwMode &VTy,
+                                            bool OperandIsAPointer) {
+  if (!VTy.isMachineValueType())
+    return failedImport("unsupported typeset");
+
+  if (VTy.getMachineValueType() == MVT::iPTR && OperandIsAPointer) {
+    addPredicate<PointerToAnyOperandMatcher>(0);
+    return Error::success();
+  }
+
+  auto OpTyOrNone = MVTToLLT(VTy.getMachineValueType().SimpleTy);
+  if (!OpTyOrNone)
+    return failedImport("unsupported type");
+
+  if (OperandIsAPointer)
+    addPredicate<PointerToAnyOperandMatcher>(OpTyOrNone->get().getSizeInBits());
+  else
+    addPredicate<LLTOperandMatcher>(*OpTyOrNone);
+  return Error::success();
+}
+
 unsigned ComplexPatternOperandMatcher::getAllocatedTemporariesBaseID() const {
   return Operand.getAllocatedTemporariesBaseID();
 }
@@ -654,28 +1320,12 @@ unsigned ComplexPatternOperandMatcher::getAllocatedTemporariesBaseID() const {
 /// Typical predicates include:
 /// * The opcode of the instruction is a particular value.
 /// * The nsw/nuw flag is/isn't set.
-class InstructionPredicateMatcher {
-protected:
-  /// This enum is used for RTTI and also defines the priority that is given to
-  /// the predicate when generating the matcher code. Kinds with higher priority
-  /// must be tested first.
-  enum PredicateKind {
-    IPM_Opcode,
-  };
-
-  PredicateKind Kind;
-
+class InstructionPredicateMatcher : public PredicateMatcher {
 public:
-  InstructionPredicateMatcher(PredicateKind Kind) : Kind(Kind) {}
+  InstructionPredicateMatcher(PredicateKind Kind, unsigned InsnVarID)
+      : PredicateMatcher(Kind, InsnVarID) {}
   virtual ~InstructionPredicateMatcher() {}
 
-  PredicateKind getKind() const { return Kind; }
-
-  /// Emit MatchTable opcodes that test whether the instruction named in
-  /// InsnVarID matches the predicate.
-  virtual void emitPredicateOpcodes(raw_ostream &OS, RuleMatcher &Rule,
-                                    unsigned InsnVarID) const = 0;
-
   /// Compare the priority of this object and B.
   ///
   /// Returns true if this object is more important than B.
@@ -689,23 +1339,36 @@ public:
   virtual unsigned countRendererFns() const { return 0; }
 };
 
+template <>
+std::string
+PredicateListMatcher<InstructionPredicateMatcher>::getNoPredicateComment() const {
+  return "No instruction predicates";
+}
+
 /// Generates code to check the opcode of an instruction.
 class InstructionOpcodeMatcher : public InstructionPredicateMatcher {
 protected:
   const CodeGenInstruction *I;
 
 public:
-  InstructionOpcodeMatcher(const CodeGenInstruction *I)
-      : InstructionPredicateMatcher(IPM_Opcode), I(I) {}
+  InstructionOpcodeMatcher(unsigned InsnVarID, const CodeGenInstruction *I)
+      : InstructionPredicateMatcher(IPM_Opcode, InsnVarID), I(I) {}
 
-  static bool classof(const InstructionPredicateMatcher *P) {
+  static bool classof(const PredicateMatcher *P) {
     return P->getKind() == IPM_Opcode;
   }
 
-  void emitPredicateOpcodes(raw_ostream &OS, RuleMatcher &Rule,
-                            unsigned InsnVarID) const override {
-    OS << "    GIM_CheckOpcode, /*MI*/" << InsnVarID << ", " << I->Namespace
-       << "::" << I->TheDef->getName() << ",\n";
+  bool isIdentical(const PredicateMatcher &B) const override {
+    return InstructionPredicateMatcher::isIdentical(B) &&
+           I == cast<InstructionOpcodeMatcher>(&B)->I;
+  }
+
+  void emitPredicateOpcodes(MatchTable &Table,
+                            RuleMatcher &Rule) const override {
+    Table << MatchTable::Opcode("GIM_CheckOpcode") << MatchTable::Comment("MI")
+          << MatchTable::IntValue(InsnVarID)
+          << MatchTable::NamedValue(I->Namespace, I->TheDef->getName())
+          << MatchTable::LineBreak;
   }
 
   /// Compare the priority of this object and B.
@@ -727,6 +1390,107 @@ public:
 
     return false;
   };
+
+  bool isConstantInstruction() const {
+    return I->TheDef->getName() == "G_CONSTANT";
+  }
+};
+
+/// Generates code to check that this instruction is a constant whose value
+/// meets an immediate predicate.
+///
+/// Immediates are slightly odd since they are typically used like an operand
+/// but are represented as an operator internally. We typically write simm8:$src
+/// in a tablegen pattern, but this is just syntactic sugar for
+/// (imm:i32)<<P:Predicate_simm8>>:$imm which more directly describes the nodes
+/// that will be matched and the predicate (which is attached to the imm
+/// operator) that will be tested. In SelectionDAG this describes a
+/// ConstantSDNode whose internal value will be tested using the simm8 predicate.
+///
+/// The corresponding GlobalISel representation is %1 = G_CONSTANT iN Value. In
+/// this representation, the immediate could be tested with an
+/// InstructionMatcher, InstructionOpcodeMatcher, OperandMatcher, and a
+/// OperandPredicateMatcher-subclass to check the Value meets the predicate but
+/// there are two implementation issues with producing that matcher
+/// configuration from the SelectionDAG pattern:
+/// * ImmLeaf is a PatFrag whose root is an InstructionMatcher. This means that
+///   were we to sink the immediate predicate to the operand we would have to
+///   have two partial implementations of PatFrag support, one for immediates
+///   and one for non-immediates.
+/// * At the point we handle the predicate, the OperandMatcher hasn't been
+///   created yet. If we were to sink the predicate to the OperandMatcher we
+///   would also have to complicate (or duplicate) the code that descends and
+///   creates matchers for the subtree.
+/// Overall, it's simpler to handle it in the place it was found.
+class InstructionImmPredicateMatcher : public InstructionPredicateMatcher {
+protected:
+  TreePredicateFn Predicate;
+
+public:
+  InstructionImmPredicateMatcher(unsigned InsnVarID,
+                                 const TreePredicateFn &Predicate)
+      : InstructionPredicateMatcher(IPM_ImmPredicate, InsnVarID),
+        Predicate(Predicate) {}
+
+  bool isIdentical(const PredicateMatcher &B) const override {
+    return InstructionPredicateMatcher::isIdentical(B) &&
+           Predicate.getOrigPatFragRecord() ==
+               cast<InstructionImmPredicateMatcher>(&B)
+                   ->Predicate.getOrigPatFragRecord();
+  }
+
+  static bool classof(const PredicateMatcher *P) {
+    return P->getKind() == IPM_ImmPredicate;
+  }
+
+  void emitPredicateOpcodes(MatchTable &Table,
+                            RuleMatcher &Rule) const override {
+    Table << MatchTable::Opcode(getMatchOpcodeForPredicate(Predicate))
+          << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+          << MatchTable::Comment("Predicate")
+          << MatchTable::NamedValue(getEnumNameForPredicate(Predicate))
+          << MatchTable::LineBreak;
+  }
+};
+
+/// Generates code to check that a memory instruction has a atomic ordering
+/// MachineMemoryOperand.
+class AtomicOrderingMMOPredicateMatcher : public InstructionPredicateMatcher {
+public:
+  enum AOComparator {
+    AO_Exactly,
+    AO_OrStronger,
+    AO_WeakerThan,
+  };
+
+protected:
+  StringRef Order;
+  AOComparator Comparator;
+
+public:
+  AtomicOrderingMMOPredicateMatcher(unsigned InsnVarID, StringRef Order,
+                                    AOComparator Comparator = AO_Exactly)
+      : InstructionPredicateMatcher(IPM_AtomicOrderingMMO, InsnVarID),
+        Order(Order), Comparator(Comparator) {}
+
+  static bool classof(const InstructionPredicateMatcher *P) {
+    return P->getKind() == IPM_AtomicOrderingMMO;
+  }
+
+  void emitPredicateOpcodes(MatchTable &Table,
+                            RuleMatcher &Rule) const override {
+    StringRef Opcode = "GIM_CheckAtomicOrdering";
+
+    if (Comparator == AO_OrStronger)
+      Opcode = "GIM_CheckAtomicOrderingOrStrongerThan";
+    if (Comparator == AO_WeakerThan)
+      Opcode = "GIM_CheckAtomicOrderingWeakerThan";
+
+    Table << MatchTable::Opcode(Opcode) << MatchTable::Comment("MI")
+          << MatchTable::IntValue(InsnVarID) << MatchTable::Comment("Order")
+          << MatchTable::NamedValue(("(int64_t)AtomicOrdering::" + Order).str())
+          << MatchTable::LineBreak;
+  }
 };
 
 /// Generates code to check that a set of predicates and operands match for a
@@ -740,16 +1504,35 @@ class InstructionMatcher
 protected:
   typedef std::vector<std::unique_ptr<OperandMatcher>> OperandVec;
 
+  RuleMatcher &Rule;
+
   /// The operands to match. All rendered operands must be present even if the
   /// condition is always true.
   OperandVec Operands;
 
+  std::string SymbolicName;
+  unsigned InsnVarID;
+
 public:
+  InstructionMatcher(RuleMatcher &Rule, StringRef SymbolicName)
+      : Rule(Rule), SymbolicName(SymbolicName) {
+    // We create a new instruction matcher.
+    // Get a new ID for that instruction.
+    InsnVarID = Rule.implicitlyDefineInsnVar(*this);
+  }
+
+  RuleMatcher &getRuleMatcher() const { return Rule; }
+
+  unsigned getVarID() const { return InsnVarID; }
+
   /// Add an operand to the matcher.
   OperandMatcher &addOperand(unsigned OpIdx, const std::string &SymbolicName,
                              unsigned AllocatedTemporariesBaseID) {
     Operands.emplace_back(new OperandMatcher(*this, OpIdx, SymbolicName,
                                              AllocatedTemporariesBaseID));
+    if (!SymbolicName.empty())
+      Rule.defineOperand(SymbolicName, *Operands.back());
+
     return *Operands.back();
   }
 
@@ -763,24 +1546,7 @@ public:
     llvm_unreachable("Failed to lookup operand");
   }
 
-  Optional<const OperandMatcher *>
-  getOptionalOperand(StringRef SymbolicName) const {
-    assert(!SymbolicName.empty() && "Cannot lookup unnamed operand");
-    for (const auto &Operand : Operands) {
-      const auto &OM = Operand->getOptionalOperand(SymbolicName);
-      if (OM.hasValue())
-        return OM.getValue();
-    }
-    return None;
-  }
-
-  const OperandMatcher &getOperand(StringRef SymbolicName) const {
-    Optional<const OperandMatcher *>OM = getOptionalOperand(SymbolicName);
-    if (OM.hasValue())
-      return *OM.getValue();
-    llvm_unreachable("Failed to lookup operand");
-  }
-
+  StringRef getSymbolicName() const { return SymbolicName; }
   unsigned getNumOperands() const { return Operands.size(); }
   OperandVec::iterator operands_begin() { return Operands.begin(); }
   OperandVec::iterator operands_end() { return Operands.end(); }
@@ -792,24 +1558,27 @@ public:
   iterator_range<OperandVec::const_iterator> operands() const {
     return make_range(operands_begin(), operands_end());
   }
+  bool operands_empty() const { return Operands.empty(); }
+
+  void pop_front() { Operands.erase(Operands.begin()); }
 
   /// Emit MatchTable opcodes to check the shape of the match and capture
   /// instructions into the MIs table.
-  void emitCaptureOpcodes(raw_ostream &OS, RuleMatcher &Rule,
-                          unsigned InsnID) {
-    OS << "    GIM_CheckNumOperands, /*MI*/" << InsnID << ", /*Expected*/"
-       << getNumOperands() << ",\n";
+  void emitCaptureOpcodes(MatchTable &Table, RuleMatcher &Rule) {
+    Table << MatchTable::Opcode("GIM_CheckNumOperands")
+          << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+          << MatchTable::Comment("Expected")
+          << MatchTable::IntValue(getNumOperands()) << MatchTable::LineBreak;
     for (const auto &Operand : Operands)
-      Operand->emitCaptureOpcodes(OS, Rule, InsnID);
+      Operand->emitCaptureOpcodes(Table, Rule);
   }
 
   /// Emit MatchTable opcodes that test whether the instruction named in
   /// InsnVarName matches all the predicates and all the operands.
-  void emitPredicateOpcodes(raw_ostream &OS, RuleMatcher &Rule,
-                            unsigned InsnVarID) const {
-    emitPredicateListOpcodes(OS, Rule, InsnVarID);
+  void emitPredicateOpcodes(MatchTable &Table, RuleMatcher &Rule) const {
+    emitPredicateListOpcodes(Table, Rule);
     for (const auto &Operand : Operands)
-      Operand->emitPredicateOpcodes(OS, Rule, InsnVarID);
+      Operand->emitPredicateOpcodes(Table, Rule);
   }
 
   /// Compare the priority of this object and B.
@@ -854,8 +1623,27 @@ public:
                  return A + Operand->countRendererFns();
                });
   }
+
+  bool isConstantInstruction() const {
+    for (const auto &P : predicates())
+      if (const InstructionOpcodeMatcher *Opcode =
+              dyn_cast<InstructionOpcodeMatcher>(P.get()))
+        return Opcode->isConstantInstruction();
+    return false;
+  }
 };
 
+template <>
+template <class Kind, class... Args>
+Optional<Kind *>
+PredicateListMatcher<InstructionPredicateMatcher>::addPredicate(
+    Args &&... args) {
+  InstructionMatcher *InstMatcher = static_cast<InstructionMatcher *>(this);
+  Predicates.emplace_back(llvm::make_unique<Kind>(InstMatcher->getVarID(),
+                                                  std::forward<Args>(args)...));
+  return static_cast<Kind *>(Predicates.back().get());
+}
+
 /// Generates code to check that the operand is a register defined by an
 /// instruction that matches the given instruction matcher.
 ///
@@ -870,33 +1658,29 @@ protected:
   std::unique_ptr<InstructionMatcher> InsnMatcher;
 
 public:
-  InstructionOperandMatcher()
-      : OperandPredicateMatcher(OPM_Instruction),
-        InsnMatcher(new InstructionMatcher()) {}
+  InstructionOperandMatcher(RuleMatcher &Rule, StringRef SymbolicName,
+                            unsigned InsnVarID, unsigned OpIdx)
+      : OperandPredicateMatcher(OPM_Instruction, InsnVarID, OpIdx),
+        InsnMatcher(new InstructionMatcher(Rule, SymbolicName)) {}
 
-  static bool classof(const OperandPredicateMatcher *P) {
+  static bool classof(const PredicateMatcher *P) {
     return P->getKind() == OPM_Instruction;
   }
 
   InstructionMatcher &getInsnMatcher() const { return *InsnMatcher; }
 
-  Optional<const OperandMatcher *>
-  getOptionalOperand(StringRef SymbolicName) const override {
-    assert(!SymbolicName.empty() && "Cannot lookup unnamed operand");
-    return InsnMatcher->getOptionalOperand(SymbolicName);
-  }
-
-  void emitCaptureOpcodes(raw_ostream &OS, RuleMatcher &Rule,
-                          unsigned InsnID, unsigned OpIdx) const override {
-    unsigned InsnVarID = Rule.defineInsnVar(OS, *InsnMatcher, InsnID, OpIdx);
-    InsnMatcher->emitCaptureOpcodes(OS, Rule, InsnVarID);
+  void emitCaptureOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
+    unsigned InsnID =
+        Rule.defineInsnVar(Table, *InsnMatcher, InsnVarID, getOpIdx());
+    (void)InsnID;
+    assert(InsnMatcher->getVarID() == InsnID &&
+           "Mismatch between build and emit");
+    InsnMatcher->emitCaptureOpcodes(Table, Rule);
   }
 
-  void emitPredicateOpcodes(raw_ostream &OS, RuleMatcher &Rule,
-                            unsigned InsnVarID_,
-                            unsigned OpIdx_) const override {
-    unsigned InsnVarID = Rule.getInsnVarID(*InsnMatcher);
-    InsnMatcher->emitPredicateOpcodes(OS, Rule, InsnVarID);
+  void emitPredicateOpcodes(MatchTable &Table,
+                            RuleMatcher &Rule) const override {
+    InsnMatcher->emitPredicateOpcodes(Table, Rule);
   }
 };
 
@@ -905,9 +1689,13 @@ class OperandRenderer {
 public:
   enum RendererKind {
     OR_Copy,
+    OR_CopyOrAddZeroReg,
     OR_CopySubReg,
+    OR_CopyConstantAsImm,
+    OR_CopyFConstantAsFPImm,
     OR_Imm,
     OR_Register,
+    OR_TempRegister,
     OR_ComplexPattern
   };
 
@@ -920,7 +1708,8 @@ public:
 
   RendererKind getKind() const { return Kind; }
 
-  virtual void emitRenderOpcodes(raw_ostream &OS, RuleMatcher &Rule) const = 0;
+  virtual void emitRenderOpcodes(MatchTable &Table,
+                                 RuleMatcher &Rule) const = 0;
 };
 
 /// A CopyRenderer emits code to copy a single operand from an existing
@@ -928,18 +1717,15 @@ public:
 class CopyRenderer : public OperandRenderer {
 protected:
   unsigned NewInsnID;
-  /// The matcher for the instruction that this operand is copied from.
-  /// This provides the facility for looking up an a operand by it's name so
-  /// that it can be used as a source for the instruction being built.
-  const InstructionMatcher &Matched;
   /// The name of the operand.
   const StringRef SymbolicName;
 
 public:
-  CopyRenderer(unsigned NewInsnID, const InstructionMatcher &Matched,
-               StringRef SymbolicName)
-      : OperandRenderer(OR_Copy), NewInsnID(NewInsnID), Matched(Matched),
-        SymbolicName(SymbolicName) {}
+  CopyRenderer(unsigned NewInsnID, StringRef SymbolicName)
+      : OperandRenderer(OR_Copy), NewInsnID(NewInsnID),
+        SymbolicName(SymbolicName) {
+    assert(!SymbolicName.empty() && "Cannot copy from an unspecified source");
+  }
 
   static bool classof(const OperandRenderer *R) {
     return R->getKind() == OR_Copy;
@@ -947,12 +1733,117 @@ public:
 
   const StringRef getSymbolicName() const { return SymbolicName; }
 
-  void emitRenderOpcodes(raw_ostream &OS, RuleMatcher &Rule) const override {
-    const OperandMatcher &Operand = Matched.getOperand(SymbolicName);
+  void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
+    const OperandMatcher &Operand = Rule.getOperandMatcher(SymbolicName);
     unsigned OldInsnVarID = Rule.getInsnVarID(Operand.getInstructionMatcher());
-    OS << "    GIR_Copy, /*NewInsnID*/" << NewInsnID << ", /*OldInsnID*/"
-       << OldInsnVarID << ", /*OpIdx*/" << Operand.getOperandIndex() << ", // "
-       << SymbolicName << "\n";
+    Table << MatchTable::Opcode("GIR_Copy") << MatchTable::Comment("NewInsnID")
+          << MatchTable::IntValue(NewInsnID) << MatchTable::Comment("OldInsnID")
+          << MatchTable::IntValue(OldInsnVarID) << MatchTable::Comment("OpIdx")
+          << MatchTable::IntValue(Operand.getOperandIndex())
+          << MatchTable::Comment(SymbolicName) << MatchTable::LineBreak;
+  }
+};
+
+/// A CopyOrAddZeroRegRenderer emits code to copy a single operand from an
+/// existing instruction to the one being built. If the operand turns out to be
+/// a 'G_CONSTANT 0' then it replaces the operand with a zero register.
+class CopyOrAddZeroRegRenderer : public OperandRenderer {
+protected:
+  unsigned NewInsnID;
+  /// The name of the operand.
+  const StringRef SymbolicName;
+  const Record *ZeroRegisterDef;
+
+public:
+  CopyOrAddZeroRegRenderer(unsigned NewInsnID,
+                           StringRef SymbolicName, Record *ZeroRegisterDef)
+      : OperandRenderer(OR_CopyOrAddZeroReg), NewInsnID(NewInsnID),
+        SymbolicName(SymbolicName), ZeroRegisterDef(ZeroRegisterDef) {
+    assert(!SymbolicName.empty() && "Cannot copy from an unspecified source");
+  }
+
+  static bool classof(const OperandRenderer *R) {
+    return R->getKind() == OR_CopyOrAddZeroReg;
+  }
+
+  const StringRef getSymbolicName() const { return SymbolicName; }
+
+  void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
+    const OperandMatcher &Operand = Rule.getOperandMatcher(SymbolicName);
+    unsigned OldInsnVarID = Rule.getInsnVarID(Operand.getInstructionMatcher());
+    Table << MatchTable::Opcode("GIR_CopyOrAddZeroReg")
+          << MatchTable::Comment("NewInsnID") << MatchTable::IntValue(NewInsnID)
+          << MatchTable::Comment("OldInsnID")
+          << MatchTable::IntValue(OldInsnVarID) << MatchTable::Comment("OpIdx")
+          << MatchTable::IntValue(Operand.getOperandIndex())
+          << MatchTable::NamedValue(
+                 (ZeroRegisterDef->getValue("Namespace")
+                      ? ZeroRegisterDef->getValueAsString("Namespace")
+                      : ""),
+                 ZeroRegisterDef->getName())
+          << MatchTable::Comment(SymbolicName) << MatchTable::LineBreak;
+  }
+};
+
+/// A CopyConstantAsImmRenderer emits code to render a G_CONSTANT instruction to
+/// an extended immediate operand.
+class CopyConstantAsImmRenderer : public OperandRenderer {
+protected:
+  unsigned NewInsnID;
+  /// The name of the operand.
+  const std::string SymbolicName;
+  bool Signed;
+
+public:
+  CopyConstantAsImmRenderer(unsigned NewInsnID, StringRef SymbolicName)
+      : OperandRenderer(OR_CopyConstantAsImm), NewInsnID(NewInsnID),
+        SymbolicName(SymbolicName), Signed(true) {}
+
+  static bool classof(const OperandRenderer *R) {
+    return R->getKind() == OR_CopyConstantAsImm;
+  }
+
+  const StringRef getSymbolicName() const { return SymbolicName; }
+
+  void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
+    const InstructionMatcher &InsnMatcher = Rule.getInstructionMatcher(SymbolicName);
+    unsigned OldInsnVarID = Rule.getInsnVarID(InsnMatcher);
+    Table << MatchTable::Opcode(Signed ? "GIR_CopyConstantAsSImm"
+                                       : "GIR_CopyConstantAsUImm")
+          << MatchTable::Comment("NewInsnID") << MatchTable::IntValue(NewInsnID)
+          << MatchTable::Comment("OldInsnID")
+          << MatchTable::IntValue(OldInsnVarID)
+          << MatchTable::Comment(SymbolicName) << MatchTable::LineBreak;
+  }
+};
+
+/// A CopyFConstantAsFPImmRenderer emits code to render a G_FCONSTANT
+/// instruction to an extended immediate operand.
+class CopyFConstantAsFPImmRenderer : public OperandRenderer {
+protected:
+  unsigned NewInsnID;
+  /// The name of the operand.
+  const std::string SymbolicName;
+
+public:
+  CopyFConstantAsFPImmRenderer(unsigned NewInsnID, StringRef SymbolicName)
+      : OperandRenderer(OR_CopyFConstantAsFPImm), NewInsnID(NewInsnID),
+        SymbolicName(SymbolicName) {}
+
+  static bool classof(const OperandRenderer *R) {
+    return R->getKind() == OR_CopyFConstantAsFPImm;
+  }
+
+  const StringRef getSymbolicName() const { return SymbolicName; }
+
+  void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
+    const InstructionMatcher &InsnMatcher = Rule.getInstructionMatcher(SymbolicName);
+    unsigned OldInsnVarID = Rule.getInsnVarID(InsnMatcher);
+    Table << MatchTable::Opcode("GIR_CopyFConstantAsFPImm")
+          << MatchTable::Comment("NewInsnID") << MatchTable::IntValue(NewInsnID)
+          << MatchTable::Comment("OldInsnID")
+          << MatchTable::IntValue(OldInsnVarID)
+          << MatchTable::Comment(SymbolicName) << MatchTable::LineBreak;
   }
 };
 
@@ -962,19 +1853,15 @@ public:
 class CopySubRegRenderer : public OperandRenderer {
 protected:
   unsigned NewInsnID;
-  /// The matcher for the instruction that this operand is copied from.
-  /// This provides the facility for looking up an a operand by it's name so
-  /// that it can be used as a source for the instruction being built.
-  const InstructionMatcher &Matched;
   /// The name of the operand.
   const StringRef SymbolicName;
   /// The subregister to extract.
   const CodeGenSubRegIndex *SubReg;
 
 public:
-  CopySubRegRenderer(unsigned NewInsnID, const InstructionMatcher &Matched,
-                     StringRef SymbolicName, const CodeGenSubRegIndex *SubReg)
-      : OperandRenderer(OR_CopySubReg), NewInsnID(NewInsnID), Matched(Matched),
+  CopySubRegRenderer(unsigned NewInsnID, StringRef SymbolicName,
+                     const CodeGenSubRegIndex *SubReg)
+      : OperandRenderer(OR_CopySubReg), NewInsnID(NewInsnID),
         SymbolicName(SymbolicName), SubReg(SubReg) {}
 
   static bool classof(const OperandRenderer *R) {
@@ -983,13 +1870,17 @@ public:
 
   const StringRef getSymbolicName() const { return SymbolicName; }
 
-  void emitRenderOpcodes(raw_ostream &OS, RuleMatcher &Rule) const override {
-    const OperandMatcher &Operand = Matched.getOperand(SymbolicName);
+  void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
+    const OperandMatcher &Operand = Rule.getOperandMatcher(SymbolicName);
     unsigned OldInsnVarID = Rule.getInsnVarID(Operand.getInstructionMatcher());
-    OS << "    GIR_CopySubReg, /*NewInsnID*/" << NewInsnID
-       << ", /*OldInsnID*/" << OldInsnVarID << ", /*OpIdx*/"
-       << Operand.getOperandIndex() << ", /*SubRegIdx*/" << SubReg->EnumValue
-       << ", // " << SymbolicName << "\n";
+    Table << MatchTable::Opcode("GIR_CopySubReg")
+          << MatchTable::Comment("NewInsnID") << MatchTable::IntValue(NewInsnID)
+          << MatchTable::Comment("OldInsnID")
+          << MatchTable::IntValue(OldInsnVarID) << MatchTable::Comment("OpIdx")
+          << MatchTable::IntValue(Operand.getOperandIndex())
+          << MatchTable::Comment("SubRegIdx")
+          << MatchTable::IntValue(SubReg->EnumValue)
+          << MatchTable::Comment(SymbolicName) << MatchTable::LineBreak;
   }
 };
 
@@ -1009,12 +1900,46 @@ public:
     return R->getKind() == OR_Register;
   }
 
-  void emitRenderOpcodes(raw_ostream &OS, RuleMatcher &Rule) const override {
-    OS << "      GIR_AddRegister, /*InsnID*/" << InsnID << ", "
-       << (RegisterDef->getValue("Namespace")
-               ? RegisterDef->getValueAsString("Namespace")
-               : "")
-       << "::" << RegisterDef->getName() << ",\n";
+  void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
+    Table << MatchTable::Opcode("GIR_AddRegister")
+          << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
+          << MatchTable::NamedValue(
+                 (RegisterDef->getValue("Namespace")
+                      ? RegisterDef->getValueAsString("Namespace")
+                      : ""),
+                 RegisterDef->getName())
+          << MatchTable::LineBreak;
+  }
+};
+
+/// Adds a specific temporary virtual register to the instruction being built.
+/// This is used to chain instructions together when emitting multiple
+/// instructions.
+class TempRegRenderer : public OperandRenderer {
+protected:
+  unsigned InsnID;
+  unsigned TempRegID;
+  bool IsDef;
+
+public:
+  TempRegRenderer(unsigned InsnID, unsigned TempRegID, bool IsDef = false)
+      : OperandRenderer(OR_Register), InsnID(InsnID), TempRegID(TempRegID),
+        IsDef(IsDef) {}
+
+  static bool classof(const OperandRenderer *R) {
+    return R->getKind() == OR_TempRegister;
+  }
+
+  void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
+    Table << MatchTable::Opcode("GIR_AddTempRegister")
+          << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
+          << MatchTable::Comment("TempRegID") << MatchTable::IntValue(TempRegID)
+          << MatchTable::Comment("TempRegFlags");
+    if (IsDef)
+      Table << MatchTable::NamedValue("RegState::Define");
+    else
+      Table << MatchTable::IntValue(0);
+    Table << MatchTable::LineBreak;
   }
 };
 
@@ -1032,9 +1957,10 @@ public:
     return R->getKind() == OR_Imm;
   }
 
-  void emitRenderOpcodes(raw_ostream &OS, RuleMatcher &Rule) const override {
-    OS << "      GIR_AddImm, /*InsnID*/" << InsnID << ", /*Imm*/" << Imm
-       << ",\n";
+  void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
+    Table << MatchTable::Opcode("GIR_AddImm") << MatchTable::Comment("InsnID")
+          << MatchTable::IntValue(InsnID) << MatchTable::Comment("Imm")
+          << MatchTable::IntValue(Imm) << MatchTable::LineBreak;
   }
 };
 
@@ -1049,6 +1975,9 @@ private:
   /// The renderer number. This must be unique within a rule since it's used to
   /// identify a temporary variable to hold the renderer function.
   unsigned RendererID;
+  /// When provided, this is the suboperand of the ComplexPattern operand to
+  /// render. Otherwise all the suboperands will be rendered.
+  Optional<unsigned> SubOperand;
 
   unsigned getNumOperands() const {
     return TheDef.getValueAsDag("Operands")->getNumArgs();
@@ -1056,17 +1985,26 @@ private:
 
 public:
   RenderComplexPatternOperand(unsigned InsnID, const Record &TheDef,
-                              StringRef SymbolicName, unsigned RendererID)
+                              StringRef SymbolicName, unsigned RendererID,
+                              Optional<unsigned> SubOperand = None)
       : OperandRenderer(OR_ComplexPattern), InsnID(InsnID), TheDef(TheDef),
-        SymbolicName(SymbolicName), RendererID(RendererID) {}
+        SymbolicName(SymbolicName), RendererID(RendererID),
+        SubOperand(SubOperand) {}
 
   static bool classof(const OperandRenderer *R) {
     return R->getKind() == OR_ComplexPattern;
   }
 
-  void emitRenderOpcodes(raw_ostream &OS, RuleMatcher &Rule) const override {
-    OS << "    GIR_ComplexRenderer, /*InsnID*/" << InsnID << ", /*RendererID*/"
-       << RendererID << ",\n";
+  void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
+    Table << MatchTable::Opcode(SubOperand.hasValue() ? "GIR_ComplexSubOperandRenderer"
+                                                      : "GIR_ComplexRenderer")
+          << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
+          << MatchTable::Comment("RendererID")
+          << MatchTable::IntValue(RendererID);
+    if (SubOperand.hasValue())
+      Table << MatchTable::Comment("SubOperand")
+            << MatchTable::IntValue(SubOperand.getValue());
+    Table << MatchTable::Comment(SymbolicName) << MatchTable::LineBreak;
   }
 };
 
@@ -1079,27 +2017,21 @@ class MatchAction {
 public:
   virtual ~MatchAction() {}
 
-  /// Emit the C++ statements to implement the action.
-  ///
-  /// \param RecycleInsnID If given, it's an instruction to recycle. The
-  ///                      requirements on the instruction vary from action to
-  ///                      action.
-  virtual void emitCxxActionStmts(raw_ostream &OS, RuleMatcher &Rule,
-                                  unsigned RecycleInsnID) const = 0;
+  /// Emit the MatchTable opcodes to implement the action.
+  virtual void emitActionOpcodes(MatchTable &Table,
+                                 RuleMatcher &Rule) const = 0;
 };
 
 /// Generates a comment describing the matched rule being acted upon.
 class DebugCommentAction : public MatchAction {
 private:
-  const PatternToMatch &P;
+  std::string S;
 
 public:
-  DebugCommentAction(const PatternToMatch &P) : P(P) {}
+  DebugCommentAction(StringRef S) : S(S) {}
 
-  void emitCxxActionStmts(raw_ostream &OS, RuleMatcher &Rule,
-                          unsigned RecycleInsnID) const override {
-    OS << "    // " << *P.getSrcPattern() << "  =>  " << *P.getDstPattern()
-       << "\n";
+  void emitActionOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
+    Table << MatchTable::Comment(S) << MatchTable::LineBreak;
   }
 };
 
@@ -1109,18 +2041,21 @@ class BuildMIAction : public MatchAction {
 private:
   unsigned InsnID;
   const CodeGenInstruction *I;
-  const InstructionMatcher &Matched;
+  const InstructionMatcher *Matched;
   std::vector<std::unique_ptr<OperandRenderer>> OperandRenderers;
 
   /// True if the instruction can be built solely by mutating the opcode.
-  bool canMutate() const {
-    if (OperandRenderers.size() != Matched.getNumOperands())
+  bool canMutate(RuleMatcher &Rule, const InstructionMatcher *Insn) const {
+    if (!Insn)
+      return false;
+
+    if (OperandRenderers.size() != Insn->getNumOperands())
       return false;
 
     for (const auto &Renderer : enumerate(OperandRenderers)) {
       if (const auto *Copy = dyn_cast<CopyRenderer>(&*Renderer.value())) {
-        const OperandMatcher &OM = Matched.getOperand(Copy->getSymbolicName());
-        if (&Matched != &OM.getInstructionMatcher() ||
+        const OperandMatcher &OM = Rule.getOperandMatcher(Copy->getSymbolicName());
+        if (Insn != &OM.getInstructionMatcher() ||
             OM.getOperandIndex() != Renderer.index())
           return false;
       } else
@@ -1131,38 +2066,61 @@ private:
   }
 
 public:
-  BuildMIAction(unsigned InsnID, const CodeGenInstruction *I,
-                const InstructionMatcher &Matched)
-      : InsnID(InsnID), I(I), Matched(Matched) {}
+  BuildMIAction(unsigned InsnID, const CodeGenInstruction *I)
+      : InsnID(InsnID), I(I), Matched(nullptr) {}
+
+  const CodeGenInstruction *getCGI() const { return I; }
+
+  void chooseInsnToMutate(RuleMatcher &Rule) {
+    for (const auto *MutateCandidate : Rule.mutatable_insns()) {
+      if (canMutate(Rule, MutateCandidate)) {
+        // Take the first one we're offered that we're able to mutate.
+        Rule.reserveInsnMatcherForMutation(MutateCandidate);
+        Matched = MutateCandidate;
+        return;
+      }
+    }
+  }
 
   template <class Kind, class... Args>
   Kind &addRenderer(Args&&... args) {
     OperandRenderers.emplace_back(
-        llvm::make_unique<Kind>(std::forward<Args>(args)...));
+        llvm::make_unique<Kind>(InsnID, std::forward<Args>(args)...));
     return *static_cast<Kind *>(OperandRenderers.back().get());
   }
 
-  void emitCxxActionStmts(raw_ostream &OS, RuleMatcher &Rule,
-                          unsigned RecycleInsnID) const override {
-    if (canMutate()) {
-      OS << "    GIR_MutateOpcode, /*InsnID*/" << InsnID
-         << ", /*RecycleInsnID*/ " << RecycleInsnID << ", /*Opcode*/"
-         << I->Namespace << "::" << I->TheDef->getName() << ",\n";
+  void emitActionOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
+    if (Matched) {
+      assert(canMutate(Rule, Matched) &&
+             "Arranged to mutate an insn that isn't mutatable");
+
+      unsigned RecycleInsnID = Rule.getInsnVarID(*Matched);
+      Table << MatchTable::Opcode("GIR_MutateOpcode")
+            << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
+            << MatchTable::Comment("RecycleInsnID")
+            << MatchTable::IntValue(RecycleInsnID)
+            << MatchTable::Comment("Opcode")
+            << MatchTable::NamedValue(I->Namespace, I->TheDef->getName())
+            << MatchTable::LineBreak;
 
       if (!I->ImplicitDefs.empty() || !I->ImplicitUses.empty()) {
         for (auto Def : I->ImplicitDefs) {
           auto Namespace = Def->getValue("Namespace")
                                ? Def->getValueAsString("Namespace")
                                : "";
-          OS << "    GIR_AddImplicitDef, " << InsnID << ", " << Namespace
-             << "::" << Def->getName() << ",\n";
+          Table << MatchTable::Opcode("GIR_AddImplicitDef")
+                << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
+                << MatchTable::NamedValue(Namespace, Def->getName())
+                << MatchTable::LineBreak;
         }
         for (auto Use : I->ImplicitUses) {
           auto Namespace = Use->getValue("Namespace")
                                ? Use->getValueAsString("Namespace")
                                : "";
-          OS << "    GIR_AddImplicitUse, " << InsnID << ", " << Namespace
-             << "::" << Use->getName() << ",\n";
+          Table << MatchTable::Opcode("GIR_AddImplicitUse")
+                << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
+                << MatchTable::NamedValue(Namespace, Use->getName())
+                << MatchTable::LineBreak;
         }
       }
       return;
@@ -1171,13 +2129,41 @@ public:
     // TODO: Simple permutation looks like it could be almost as common as
     //       mutation due to commutative operations.
 
-    OS << "    GIR_BuildMI, /*InsnID*/" << InsnID << ", /*Opcode*/"
-       << I->Namespace << "::" << I->TheDef->getName() << ",\n";
+    Table << MatchTable::Opcode("GIR_BuildMI") << MatchTable::Comment("InsnID")
+          << MatchTable::IntValue(InsnID) << MatchTable::Comment("Opcode")
+          << MatchTable::NamedValue(I->Namespace, I->TheDef->getName())
+          << MatchTable::LineBreak;
     for (const auto &Renderer : OperandRenderers)
-      Renderer->emitRenderOpcodes(OS, Rule);
+      Renderer->emitRenderOpcodes(Table, Rule);
+
+    if (I->mayLoad || I->mayStore) {
+      Table << MatchTable::Opcode("GIR_MergeMemOperands")
+            << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
+            << MatchTable::Comment("MergeInsnID's");
+      // Emit the ID's for all the instructions that are matched by this rule.
+      // TODO: Limit this to matched instructions that mayLoad/mayStore or have
+      //       some other means of having a memoperand. Also limit this to
+      //       emitted instructions that expect to have a memoperand too. For
+      //       example, (G_SEXT (G_LOAD x)) that results in separate load and
+      //       sign-extend instructions shouldn't put the memoperand on the
+      //       sign-extend since it has no effect there.
+      std::vector<unsigned> MergeInsnIDs;
+      for (const auto &IDMatcherPair : Rule.defined_insn_vars())
+        MergeInsnIDs.push_back(IDMatcherPair.second);
+      std::sort(MergeInsnIDs.begin(), MergeInsnIDs.end());
+      for (const auto &MergeInsnID : MergeInsnIDs)
+        Table << MatchTable::IntValue(MergeInsnID);
+      Table << MatchTable::NamedValue("GIU_MergeMemOperands_EndOfList")
+            << MatchTable::LineBreak;
+    }
 
-    OS << "    GIR_MergeMemOperands, /*InsnID*/" << InsnID << ",\n"
-       << "    GIR_EraseFromParent, /*InsnID*/" << RecycleInsnID << ",\n";
+    // FIXME: This is a hack but it's sufficient for ISel. We'll need to do
+    //        better for combines. Particularly when there are multiple match
+    //        roots.
+    if (InsnID == 0)
+      Table << MatchTable::Opcode("GIR_EraseFromParent")
+            << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
+            << MatchTable::LineBreak;
   }
 };
 
@@ -1189,9 +2175,10 @@ class ConstrainOperandsToDefinitionAction : public MatchAction {
 public:
   ConstrainOperandsToDefinitionAction(unsigned InsnID) : InsnID(InsnID) {}
 
-  void emitCxxActionStmts(raw_ostream &OS, RuleMatcher &Rule,
-                          unsigned RecycleInsnID) const override {
-    OS << "    GIR_ConstrainSelectedInstOperands, /*InsnID*/" << InsnID << ",\n";
+  void emitActionOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
+    Table << MatchTable::Opcode("GIR_ConstrainSelectedInstOperands")
+          << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
+          << MatchTable::LineBreak;
   }
 };
 
@@ -1207,15 +2194,38 @@ public:
                                    const CodeGenRegisterClass &RC)
       : InsnID(InsnID), OpIdx(OpIdx), RC(RC) {}
 
-  void emitCxxActionStmts(raw_ostream &OS, RuleMatcher &Rule,
-                          unsigned RecycleInsnID) const override {
-    OS << "    GIR_ConstrainOperandRC, /*InsnID*/" << InsnID << ", /*Op*/"
-       << OpIdx << ", /*RC " << RC.getName() << "*/ " << RC.EnumValue << ",\n";
+  void emitActionOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
+    Table << MatchTable::Opcode("GIR_ConstrainOperandRC")
+          << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
+          << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx)
+          << MatchTable::Comment("RC " + RC.getName())
+          << MatchTable::IntValue(RC.EnumValue) << MatchTable::LineBreak;
   }
 };
 
-InstructionMatcher &RuleMatcher::addInstructionMatcher() {
-  Matchers.emplace_back(new InstructionMatcher());
+/// Generates code to create a temporary register which can be used to chain
+/// instructions together.
+class MakeTempRegisterAction : public MatchAction {
+private:
+  LLTCodeGen Ty;
+  unsigned TempRegID;
+
+public:
+  MakeTempRegisterAction(const LLTCodeGen &Ty, unsigned TempRegID)
+      : Ty(Ty), TempRegID(TempRegID) {}
+
+  void emitActionOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
+    Table << MatchTable::Opcode("GIR_MakeTempReg")
+          << MatchTable::Comment("TempRegID") << MatchTable::IntValue(TempRegID)
+          << MatchTable::Comment("TypeID")
+          << MatchTable::NamedValue(Ty.getCxxEnumValue())
+          << MatchTable::LineBreak;
+  }
+};
+
+InstructionMatcher &RuleMatcher::addInstructionMatcher(StringRef SymbolicName) {
+  Matchers.emplace_back(new InstructionMatcher(*this, SymbolicName));
+  MutatableInsns.insert(Matchers.back().get());
   return *Matchers.back();
 }
 
@@ -1227,12 +2237,33 @@ const std::vector<Record *> &RuleMatcher::getRequiredFeatures() const {
   return RequiredFeatures;
 }
 
+// Emplaces an action of the specified Kind at the end of the action list.
+//
+// Returns a reference to the newly created action.
+//
+// Like std::vector::emplace_back(), may invalidate all iterators if the new
+// size exceeds the capacity. Otherwise, only invalidates the past-the-end
+// iterator.
 template <class Kind, class... Args>
 Kind &RuleMatcher::addAction(Args &&... args) {
   Actions.emplace_back(llvm::make_unique<Kind>(std::forward<Args>(args)...));
   return *static_cast<Kind *>(Actions.back().get());
 }
 
+// Emplaces an action of the specified Kind before the given insertion point.
+//
+// Returns an iterator pointing at the newly created instruction.
+//
+// Like std::vector::insert(), may invalidate all iterators if the new size
+// exceeds the capacity. Otherwise, only invalidates the iterators from the
+// insertion point onwards.
+template <class Kind, class... Args>
+action_iterator RuleMatcher::insertAction(action_iterator InsertPt,
+                                          Args &&... args) {
+  return Actions.emplace(InsertPt,
+                         llvm::make_unique<Kind>(std::forward<Args>(args)...));
+}
+
 unsigned
 RuleMatcher::implicitlyDefineInsnVar(const InstructionMatcher &Matcher) {
   unsigned NewInsnVarID = NextInsnVarID++;
@@ -1240,13 +2271,16 @@ RuleMatcher::implicitlyDefineInsnVar(const InstructionMatcher &Matcher) {
   return NewInsnVarID;
 }
 
-unsigned RuleMatcher::defineInsnVar(raw_ostream &OS,
+unsigned RuleMatcher::defineInsnVar(MatchTable &Table,
                                     const InstructionMatcher &Matcher,
                                     unsigned InsnID, unsigned OpIdx) {
   unsigned NewInsnVarID = implicitlyDefineInsnVar(Matcher);
-  OS << "    GIM_RecordInsn, /*DefineMI*/" << NewInsnVarID << ", /*MI*/"
-     << InsnID << ", /*OpIdx*/" << OpIdx << ", // MIs[" << NewInsnVarID
-     << "]\n";
+  Table << MatchTable::Opcode("GIM_RecordInsn")
+        << MatchTable::Comment("DefineMI") << MatchTable::IntValue(NewInsnVarID)
+        << MatchTable::Comment("MI") << MatchTable::IntValue(InsnID)
+        << MatchTable::Comment("OpIdx") << MatchTable::IntValue(OpIdx)
+        << MatchTable::Comment("MIs[" + llvm::to_string(NewInsnVarID) + "]")
+        << MatchTable::LineBreak;
   return NewInsnVarID;
 }
 
@@ -1257,15 +2291,48 @@ unsigned RuleMatcher::getInsnVarID(const InstructionMatcher &InsnMatcher) const
   llvm_unreachable("Matched Insn was not captured in a local variable");
 }
 
+void RuleMatcher::defineOperand(StringRef SymbolicName, OperandMatcher &OM) {
+  if (DefinedOperands.find(SymbolicName) == DefinedOperands.end()) {
+    DefinedOperands[SymbolicName] = &OM;
+    return;
+  }
+
+  // If the operand is already defined, then we must ensure both references in
+  // the matcher have the exact same node.
+  OM.addPredicate<SameOperandMatcher>(OM.getSymbolicName());
+}
+
+const InstructionMatcher &
+RuleMatcher::getInstructionMatcher(StringRef SymbolicName) const {
+  for (const auto &I : InsnVariableIDs)
+    if (I.first->getSymbolicName() == SymbolicName)
+      return *I.first;
+  llvm_unreachable(
+      ("Failed to lookup instruction " + SymbolicName).str().c_str());
+}
+
+const OperandMatcher &
+RuleMatcher::getOperandMatcher(StringRef Name) const {
+  const auto &I = DefinedOperands.find(Name);
+
+  if (I == DefinedOperands.end())
+    PrintFatalError(SrcLoc, "Operand " + Name + " was not declared in matcher");
+
+  return *I->second;
+}
+
 /// Emit MatchTable opcodes to check the shape of the match and capture
 /// instructions into local variables.
-void RuleMatcher::emitCaptureOpcodes(raw_ostream &OS) {
+void RuleMatcher::emitCaptureOpcodes(MatchTable &Table) {
   assert(Matchers.size() == 1 && "Cannot handle multi-root matchers yet");
   unsigned InsnVarID = implicitlyDefineInsnVar(*Matchers.front());
-  Matchers.front()->emitCaptureOpcodes(OS, *this, InsnVarID);
+  (void)InsnVarID;
+  assert(Matchers.front()->getVarID() == InsnVarID &&
+         "IDs differ between build and emit");
+  Matchers.front()->emitCaptureOpcodes(Table, *this);
 }
 
-void RuleMatcher::emit(raw_ostream &OS) {
+void RuleMatcher::emit(MatchTable &Table) {
   if (Matchers.empty())
     llvm_unreachable("Unexpected empty matcher!");
 
@@ -1280,16 +2347,20 @@ void RuleMatcher::emit(raw_ostream &OS) {
   // on some targets but we don't need to make use of that yet.
   assert(Matchers.size() == 1 && "Cannot handle multi-root matchers yet");
 
-  OS << "  const static int64_t MatchTable" << CurrentMatchTableID << "[] = {\n";
+  unsigned LabelID = Table.allocateLabelID();
+  Table << MatchTable::Opcode("GIM_Try", +1)
+        << MatchTable::Comment("On fail goto") << MatchTable::JumpTarget(LabelID)
+        << MatchTable::LineBreak;
+
   if (!RequiredFeatures.empty()) {
-    OS << "    GIM_CheckFeatures, " << getNameForFeatureBitset(RequiredFeatures)
-       << ",\n";
+    Table << MatchTable::Opcode("GIM_CheckFeatures")
+          << MatchTable::NamedValue(getNameForFeatureBitset(RequiredFeatures))
+          << MatchTable::LineBreak;
   }
 
-  emitCaptureOpcodes(OS);
+  emitCaptureOpcodes(Table);
 
-  Matchers.front()->emitPredicateOpcodes(OS, *this,
-                                         getInsnVarID(*Matchers.front()));
+  Matchers.front()->emitPredicateOpcodes(Table, *this);
 
   // We must also check if it's safe to fold the matched instructions.
   if (InsnVariableIDs.size() >= 2) {
@@ -1307,7 +2378,9 @@ void RuleMatcher::emit(raw_ostream &OS) {
 
     for (const auto &InsnID : InsnIDs) {
       // Reject the difficult cases until we have a more accurate check.
-      OS << "    GIM_CheckIsSafeToFold, /*InsnID*/" << InsnID << ",\n";
+      Table << MatchTable::Opcode("GIM_CheckIsSafeToFold")
+            << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
+            << MatchTable::LineBreak;
 
       // FIXME: Emit checks to determine it's _actually_ safe to fold and/or
       //        account for unsafe cases.
@@ -1347,16 +2420,14 @@ void RuleMatcher::emit(raw_ostream &OS) {
   }
 
   for (const auto &MA : Actions)
-    MA->emitCxxActionStmts(OS, *this, 0);
-  OS << "    GIR_Done,\n"
-     << "  };\n"
-     << "  State.MIs.resize(1);\n"
-     << "  DEBUG(dbgs() << \"Processing MatchTable" << CurrentMatchTableID
-     << "\\n\");\n"
-     << "  if (executeMatchTable(*this, OutMIs, State, MatcherInfo, MatchTable"
-     << CurrentMatchTableID << ", TII, MRI, TRI, RBI, AvailableFeatures)) {\n"
-     << "    return true;\n"
-     << "  }\n\n";
+    MA->emitActionOpcodes(Table, *this);
+
+  if (GenerateCoverage)
+    Table << MatchTable::Opcode("GIR_Coverage") << MatchTable::IntValue(RuleID)
+          << MatchTable::LineBreak;
+
+  Table << MatchTable::Opcode("GIR_Done", -1) << MatchTable::LineBreak
+        << MatchTable::Label(LabelID);
 }
 
 bool RuleMatcher::isHigherPriorityThan(const RuleMatcher &B) const {
@@ -1384,6 +2455,54 @@ unsigned RuleMatcher::countRendererFns() const {
       });
 }
 
+bool OperandPredicateMatcher::isHigherPriorityThan(
+    const OperandPredicateMatcher &B) const {
+  // Generally speaking, an instruction is more important than an Int or a
+  // LiteralInt because it can cover more nodes but theres an exception to
+  // this. G_CONSTANT's are less important than either of those two because they
+  // are more permissive.
+
+  const InstructionOperandMatcher *AOM =
+      dyn_cast<InstructionOperandMatcher>(this);
+  const InstructionOperandMatcher *BOM =
+      dyn_cast<InstructionOperandMatcher>(&B);
+  bool AIsConstantInsn = AOM && AOM->getInsnMatcher().isConstantInstruction();
+  bool BIsConstantInsn = BOM && BOM->getInsnMatcher().isConstantInstruction();
+
+  if (AOM && BOM) {
+    // The relative priorities between a G_CONSTANT and any other instruction
+    // don't actually matter but this code is needed to ensure a strict weak
+    // ordering. This is particularly important on Windows where the rules will
+    // be incorrectly sorted without it.
+    if (AIsConstantInsn != BIsConstantInsn)
+      return AIsConstantInsn < BIsConstantInsn;
+    return false;
+  }
+
+  if (AOM && AIsConstantInsn && (B.Kind == OPM_Int || B.Kind == OPM_LiteralInt))
+    return false;
+  if (BOM && BIsConstantInsn && (Kind == OPM_Int || Kind == OPM_LiteralInt))
+    return true;
+
+  return Kind < B.Kind;
+}
+
+void SameOperandMatcher::emitPredicateOpcodes(MatchTable &Table,
+                                              RuleMatcher &Rule) const {
+  const OperandMatcher &OtherOM = Rule.getOperandMatcher(MatchingName);
+  unsigned OtherInsnVarID = Rule.getInsnVarID(OtherOM.getInstructionMatcher());
+  assert(OtherInsnVarID == OtherOM.getInstructionMatcher().getVarID());
+
+  Table << MatchTable::Opcode("GIM_CheckIsSameOperand")
+        << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+        << MatchTable::Comment("OpIdx") << MatchTable::IntValue(OpIdx)
+        << MatchTable::Comment("OtherMI")
+        << MatchTable::IntValue(OtherInsnVarID)
+        << MatchTable::Comment("OtherOpIdx")
+        << MatchTable::IntValue(OtherOM.getOperandIndex())
+        << MatchTable::LineBreak;
+}
+
 //===- GlobalISelEmitter class --------------------------------------------===//
 
 class GlobalISelEmitter {
@@ -1397,9 +2516,11 @@ private:
   const CodeGenTarget &Target;
   CodeGenRegBank CGRegs;
 
-  /// Keep track of the equivalence between SDNodes and Instruction.
+  /// Keep track of the equivalence between SDNodes and Instruction by mapping
+  /// SDNodes to the GINodeEquiv mapping. We need to map to the GINodeEquiv to
+  /// check for attributes on the relation such as CheckMMOIsNonAtomic.
   /// This is defined using 'GINodeEquiv' in the target description.
-  DenseMap<Record *, const CodeGenInstruction *> NodeEquivs;
+  DenseMap<Record *, Record *> NodeEquivs;
 
   /// Keep track of the equivalence between ComplexPattern's and
   /// GIComplexOperandMatcher. Map entries are specified by subclassing
@@ -1409,41 +2530,99 @@ private:
   // Map of predicates to their subtarget features.
   SubtargetFeatureInfoMap SubtargetFeatures;
 
+  // Rule coverage information.
+  Optional<CodeGenCoverage> RuleCoverage;
+
   void gatherNodeEquivs();
-  const CodeGenInstruction *findNodeEquiv(Record *N) const;
-
-  Error importRulePredicates(RuleMatcher &M, ArrayRef<Init *> Predicates);
-  Expected<InstructionMatcher &>
-  createAndImportSelDAGMatcher(InstructionMatcher &InsnMatcher,
-                               const TreePatternNode *Src,
-                               unsigned &TempOpIdx) const;
-  Error importChildMatcher(InstructionMatcher &InsnMatcher,
-                           const TreePatternNode *SrcChild, unsigned OpIdx,
+  Record *findNodeEquiv(Record *N) const;
+
+  Error importRulePredicates(RuleMatcher &M, ArrayRef<Predicate> Predicates);
+  Expected<InstructionMatcher &> createAndImportSelDAGMatcher(
+      RuleMatcher &Rule, InstructionMatcher &InsnMatcher,
+      const TreePatternNode *Src, unsigned &TempOpIdx) const;
+  Error importComplexPatternOperandMatcher(OperandMatcher &OM, Record *R,
+                                           unsigned &TempOpIdx) const;
+  Error importChildMatcher(RuleMatcher &Rule, InstructionMatcher &InsnMatcher,
+                           const TreePatternNode *SrcChild,
+                           bool OperandIsAPointer, unsigned OpIdx,
                            unsigned &TempOpIdx) const;
+
   Expected<BuildMIAction &>
-  createAndImportInstructionRenderer(RuleMatcher &M, const TreePatternNode *Dst,
-                                     const InstructionMatcher &InsnMatcher);
-  Error importExplicitUseRenderer(BuildMIAction &DstMIBuilder,
-                                  TreePatternNode *DstChild,
-                                  const InstructionMatcher &InsnMatcher) const;
+  createAndImportInstructionRenderer(RuleMatcher &M,
+                                     const TreePatternNode *Dst);
+  Expected<action_iterator> createAndImportSubInstructionRenderer(
+      action_iterator InsertPt, RuleMatcher &M, const TreePatternNode *Dst,
+      unsigned TempReg);
+  Expected<action_iterator>
+  createInstructionRenderer(action_iterator InsertPt, RuleMatcher &M,
+                            const TreePatternNode *Dst);
+  void importExplicitDefRenderers(BuildMIAction &DstMIBuilder);
+  Expected<action_iterator>
+  importExplicitUseRenderers(action_iterator InsertPt, RuleMatcher &M,
+                             BuildMIAction &DstMIBuilder,
+                             const llvm::TreePatternNode *Dst);
+  Expected<action_iterator>
+  importExplicitUseRenderer(action_iterator InsertPt, RuleMatcher &Rule,
+                            BuildMIAction &DstMIBuilder,
+                            TreePatternNode *DstChild);
   Error importDefaultOperandRenderers(BuildMIAction &DstMIBuilder,
                                       DagInit *DefaultOps) const;
   Error
   importImplicitDefRenderers(BuildMIAction &DstMIBuilder,
                              const std::vector<Record *> &ImplicitDefs) const;
 
+  void emitImmPredicates(raw_ostream &OS, StringRef TypeIdentifier,
+                         StringRef Type,
+                         std::function<bool(const Record *R)> Filter);
+
   /// Analyze pattern \p P, returning a matcher for it if possible.
   /// Otherwise, return an Error explaining why we don't support it.
   Expected<RuleMatcher> runOnPattern(const PatternToMatch &P);
 
   void declareSubtargetFeature(Record *Predicate);
+
+  TreePatternNode *fixupPatternNode(TreePatternNode *N);
+  void fixupPatternTrees(TreePattern *P);
+
+  /// Takes a sequence of \p Rules and group them based on the predicates
+  /// they share. \p StorageGroupMatcher is used as a memory container
+  /// for the the group that are created as part of this process.
+  /// The optimization process does not change the relative order of
+  /// the rules. In particular, we don't try to share predicates if
+  /// that means reordering the rules (e.g., we won't group R1 and R3
+  /// in the following example as it would imply reordering R2 and R3
+  /// => R1 p1, R2 p2, R3 p1).
+  ///
+  /// What this optimization does looks like:
+  /// Output without optimization:
+  /// \verbatim
+  /// # R1
+  ///  # predicate A
+  ///  # predicate B
+  ///  ...
+  /// # R2
+  ///  # predicate A // <-- effectively this is going to be checked twice.
+  ///                //     Once in R1 and once in R2.
+  ///  # predicate C
+  /// \endverbatim
+  /// Output with optimization:
+  /// \verbatim
+  /// # Group1_2
+  ///  # predicate A // <-- Check is now shared.
+  ///  # R1
+  ///   # predicate B
+  ///  # R2
+  ///   # predicate C
+  /// \endverbatim
+  std::vector<Matcher *> optimizeRules(
+      std::vector<RuleMatcher> &Rules,
+      std::vector<std::unique_ptr<GroupMatcher>> &StorageGroupMatcher);
 };
 
 void GlobalISelEmitter::gatherNodeEquivs() {
   assert(NodeEquivs.empty());
   for (Record *Equiv : RK.getAllDerivedDefinitions("GINodeEquiv"))
-    NodeEquivs[Equiv->getValueAsDef("Node")] =
-        &Target.getInstruction(Equiv->getValueAsDef("I"));
+    NodeEquivs[Equiv->getValueAsDef("Node")] = Equiv;
 
   assert(ComplexPatternEquivs.empty());
   for (Record *Equiv : RK.getAllDerivedDefinitions("GIComplexPatternEquiv")) {
@@ -1454,31 +2633,33 @@ void GlobalISelEmitter::gatherNodeEquivs() {
  }
 }
 
-const CodeGenInstruction *GlobalISelEmitter::findNodeEquiv(Record *N) const {
+Record *GlobalISelEmitter::findNodeEquiv(Record *N) const {
   return NodeEquivs.lookup(N);
 }
 
 GlobalISelEmitter::GlobalISelEmitter(RecordKeeper &RK)
-    : RK(RK), CGP(RK), Target(CGP.getTargetInfo()), CGRegs(RK) {}
+    : RK(RK), CGP(RK, [&](TreePattern *P) { fixupPatternTrees(P); }),
+      Target(CGP.getTargetInfo()), CGRegs(RK, Target.getHwModes()) {}
 
 //===- Emitter ------------------------------------------------------------===//
 
 Error
 GlobalISelEmitter::importRulePredicates(RuleMatcher &M,
-                                        ArrayRef<Init *> Predicates) {
-  for (const Init *Predicate : Predicates) {
-    const DefInit *PredicateDef = static_cast<const DefInit *>(Predicate);
-    declareSubtargetFeature(PredicateDef->getDef());
-    M.addRequiredFeature(PredicateDef->getDef());
+                                        ArrayRef<Predicate> Predicates) {
+  for (const Predicate &P : Predicates) {
+    if (!P.Def)
+      continue;
+    declareSubtargetFeature(P.Def);
+    M.addRequiredFeature(P.Def);
   }
 
   return Error::success();
 }
 
-Expected<InstructionMatcher &>
-GlobalISelEmitter::createAndImportSelDAGMatcher(InstructionMatcher &InsnMatcher,
-                                                const TreePatternNode *Src,
-                                                unsigned &TempOpIdx) const {
+Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
+    RuleMatcher &Rule, InstructionMatcher &InsnMatcher,
+    const TreePatternNode *Src, unsigned &TempOpIdx) const {
+  Record *SrcGIEquivOrNull = nullptr;
   const CodeGenInstruction *SrcGIOrNull = nullptr;
 
   // Start with the defined operands (i.e., the results of the root operator).
@@ -1494,34 +2675,122 @@ GlobalISelEmitter::createAndImportSelDAGMatcher(InstructionMatcher &InsnMatcher,
       return failedImport(
           "Unable to deduce gMIR opcode to handle Src (which is a leaf)");
   } else {
-    SrcGIOrNull = findNodeEquiv(Src->getOperator());
-    if (!SrcGIOrNull)
+    SrcGIEquivOrNull = findNodeEquiv(Src->getOperator());
+    if (!SrcGIEquivOrNull)
       return failedImport("Pattern operator lacks an equivalent Instruction" +
                           explainOperator(Src->getOperator()));
-    auto &SrcGI = *SrcGIOrNull;
+    SrcGIOrNull = &Target.getInstruction(SrcGIEquivOrNull->getValueAsDef("I"));
 
     // The operators look good: match the opcode
-    InsnMatcher.addPredicate<InstructionOpcodeMatcher>(&SrcGI);
+    InsnMatcher.addPredicate<InstructionOpcodeMatcher>(SrcGIOrNull);
   }
 
   unsigned OpIdx = 0;
-  for (const EEVT::TypeSet &Ty : Src->getExtTypes()) {
-    auto OpTyOrNone = MVTToLLT(Ty.getConcrete());
-
-    if (!OpTyOrNone)
-      return failedImport(
-          "Result of Src pattern operator has an unsupported type");
-
+  for (const TypeSetByHwMode &VTy : Src->getExtTypes()) {
     // Results don't have a name unless they are the root node. The caller will
     // set the name if appropriate.
     OperandMatcher &OM = InsnMatcher.addOperand(OpIdx++, "", TempOpIdx);
-    OM.addPredicate<LLTOperandMatcher>(*OpTyOrNone);
+    if (auto Error = OM.addTypeCheckPredicate(VTy, false /* OperandIsAPointer */))
+      return failedImport(toString(std::move(Error)) +
+                          " for result of Src pattern operator");
+  }
+
+  for (const auto &Predicate : Src->getPredicateFns()) {
+    if (Predicate.isAlwaysTrue())
+      continue;
+
+    if (Predicate.isImmediatePattern()) {
+      InsnMatcher.addPredicate<InstructionImmPredicateMatcher>(Predicate);
+      continue;
+    }
+
+    // No check required. G_LOAD by itself is a non-extending load.
+    if (Predicate.isNonExtLoad())
+      continue;
+
+    // No check required. G_STORE by itself is a non-extending store.
+    if (Predicate.isNonTruncStore())
+      continue;
+
+    if (Predicate.isLoad() || Predicate.isStore() || Predicate.isAtomic()) {
+      if (Predicate.getMemoryVT() != nullptr) {
+        Optional<LLTCodeGen> MemTyOrNone =
+            MVTToLLT(getValueType(Predicate.getMemoryVT()));
+
+        if (!MemTyOrNone)
+          return failedImport("MemVT could not be converted to LLT");
+
+        OperandMatcher &OM = InsnMatcher.getOperand(0);
+        OM.addPredicate<LLTOperandMatcher>(MemTyOrNone.getValue());
+        continue;
+      }
+    }
+
+    if (Predicate.isLoad() || Predicate.isStore()) {
+      // No check required. A G_LOAD/G_STORE is an unindexed load.
+      if (Predicate.isUnindexed())
+        continue;
+    }
+
+    if (Predicate.isAtomic()) {
+      if (Predicate.isAtomicOrderingMonotonic()) {
+        InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>(
+            "Monotonic");
+        continue;
+      }
+      if (Predicate.isAtomicOrderingAcquire()) {
+        InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>("Acquire");
+        continue;
+      }
+      if (Predicate.isAtomicOrderingRelease()) {
+        InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>("Release");
+        continue;
+      }
+      if (Predicate.isAtomicOrderingAcquireRelease()) {
+        InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>(
+            "AcquireRelease");
+        continue;
+      }
+      if (Predicate.isAtomicOrderingSequentiallyConsistent()) {
+        InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>(
+            "SequentiallyConsistent");
+        continue;
+      }
+
+      if (Predicate.isAtomicOrderingAcquireOrStronger()) {
+        InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>(
+            "Acquire", AtomicOrderingMMOPredicateMatcher::AO_OrStronger);
+        continue;
+      }
+      if (Predicate.isAtomicOrderingWeakerThanAcquire()) {
+        InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>(
+            "Acquire", AtomicOrderingMMOPredicateMatcher::AO_WeakerThan);
+        continue;
+      }
+
+      if (Predicate.isAtomicOrderingReleaseOrStronger()) {
+        InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>(
+            "Release", AtomicOrderingMMOPredicateMatcher::AO_OrStronger);
+        continue;
+      }
+      if (Predicate.isAtomicOrderingWeakerThanRelease()) {
+        InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>(
+            "Release", AtomicOrderingMMOPredicateMatcher::AO_WeakerThan);
+        continue;
+      }
+    }
+
+    return failedImport("Src pattern child has predicate (" +
+                        explainPredicates(Src) + ")");
   }
+  if (SrcGIEquivOrNull && SrcGIEquivOrNull->getValueAsBit("CheckMMOIsNonAtomic"))
+    InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>("NotAtomic");
 
   if (Src->isLeaf()) {
     Init *SrcInit = Src->getLeafValue();
     if (IntInit *SrcIntInit = dyn_cast<IntInit>(SrcInit)) {
-      OperandMatcher &OM = InsnMatcher.addOperand(OpIdx++, "", TempOpIdx);
+      OperandMatcher &OM =
+          InsnMatcher.addOperand(OpIdx++, Src->getName(), TempOpIdx);
       OM.addPredicate<LiteralIntOperandMatcher>(SrcIntInit->getValue());
     } else
       return failedImport(
@@ -1529,13 +2798,29 @@ GlobalISelEmitter::createAndImportSelDAGMatcher(InstructionMatcher &InsnMatcher,
   } else {
     assert(SrcGIOrNull &&
            "Expected to have already found an equivalent Instruction");
+    if (SrcGIOrNull->TheDef->getName() == "G_CONSTANT" ||
+        SrcGIOrNull->TheDef->getName() == "G_FCONSTANT") {
+      // imm/fpimm still have operands but we don't need to do anything with it
+      // here since we don't support ImmLeaf predicates yet. However, we still
+      // need to note the hidden operand to get GIM_CheckNumOperands correct.
+      InsnMatcher.addOperand(OpIdx++, "", TempOpIdx);
+      return InsnMatcher;
+    }
+
     // Match the used operands (i.e. the children of the operator).
     for (unsigned i = 0, e = Src->getNumChildren(); i != e; ++i) {
       TreePatternNode *SrcChild = Src->getChild(i);
 
-      // For G_INTRINSIC, the operand immediately following the defs is an
-      // intrinsic ID.
-      if (SrcGIOrNull->TheDef->getName() == "G_INTRINSIC" && i == 0) {
+      // SelectionDAG allows pointers to be represented with iN since it doesn't
+      // distinguish between pointers and integers but they are different types in GlobalISel.
+      // Coerce integers to pointers to address space 0 if the context indicates a pointer.
+      bool OperandIsAPointer = SrcGIOrNull->isOperandAPointer(i);
+
+      // For G_INTRINSIC/G_INTRINSIC_W_SIDE_EFFECTS, the operand immediately
+      // following the defs is an intrinsic ID.
+      if ((SrcGIOrNull->TheDef->getName() == "G_INTRINSIC" ||
+           SrcGIOrNull->TheDef->getName() == "G_INTRINSIC_W_SIDE_EFFECTS") &&
+          i == 0) {
         if (const CodeGenIntrinsic *II = Src->getIntrinsicInfo(CGP)) {
           OperandMatcher &OM =
               InsnMatcher.addOperand(OpIdx++, SrcChild->getName(), TempOpIdx);
@@ -1547,7 +2832,8 @@ GlobalISelEmitter::createAndImportSelDAGMatcher(InstructionMatcher &InsnMatcher,
       }
 
       if (auto Error =
-              importChildMatcher(InsnMatcher, SrcChild, OpIdx++, TempOpIdx))
+              importChildMatcher(Rule, InsnMatcher, SrcChild, OperandIsAPointer,
+                                 OpIdx++, TempOpIdx))
         return std::move(Error);
     }
   }
@@ -1555,18 +2841,30 @@ GlobalISelEmitter::createAndImportSelDAGMatcher(InstructionMatcher &InsnMatcher,
   return InsnMatcher;
 }
 
-Error GlobalISelEmitter::importChildMatcher(InstructionMatcher &InsnMatcher,
+Error GlobalISelEmitter::importComplexPatternOperandMatcher(
+    OperandMatcher &OM, Record *R, unsigned &TempOpIdx) const {
+  const auto &ComplexPattern = ComplexPatternEquivs.find(R);
+  if (ComplexPattern == ComplexPatternEquivs.end())
+    return failedImport("SelectionDAG ComplexPattern (" + R->getName() +
+                        ") not mapped to GlobalISel");
+
+  OM.addPredicate<ComplexPatternOperandMatcher>(OM, *ComplexPattern->second);
+  TempOpIdx++;
+  return Error::success();
+}
+
+Error GlobalISelEmitter::importChildMatcher(RuleMatcher &Rule,
+                                            InstructionMatcher &InsnMatcher,
                                             const TreePatternNode *SrcChild,
+                                            bool OperandIsAPointer,
                                             unsigned OpIdx,
                                             unsigned &TempOpIdx) const {
   OperandMatcher &OM =
       InsnMatcher.addOperand(OpIdx, SrcChild->getName(), TempOpIdx);
+  if (OM.isSameAsAnotherOperand())
+    return Error::success();
 
-  if (SrcChild->hasAnyPredicate())
-    return failedImport("Src pattern child has predicate (" +
-                        explainPredicates(SrcChild) + ")");
-
-  ArrayRef<EEVT::TypeSet> ChildTypes = SrcChild->getExtTypes();
+  ArrayRef<TypeSetByHwMode> ChildTypes = SrcChild->getExtTypes();
   if (ChildTypes.size() != 1)
     return failedImport("Src pattern child has multiple results");
 
@@ -1581,24 +2879,55 @@ Error GlobalISelEmitter::importChildMatcher(InstructionMatcher &InsnMatcher,
     }
   }
 
-  auto OpTyOrNone = MVTToLLT(ChildTypes.front().getConcrete());
-  if (!OpTyOrNone)
-    return failedImport("Src operand has an unsupported type (" + to_string(*SrcChild) + ")");
-  OM.addPredicate<LLTOperandMatcher>(*OpTyOrNone);
+  if (auto Error =
+          OM.addTypeCheckPredicate(ChildTypes.front(), OperandIsAPointer))
+    return failedImport(toString(std::move(Error)) + " for Src operand (" +
+                        to_string(*SrcChild) + ")");
 
   // Check for nested instructions.
   if (!SrcChild->isLeaf()) {
+    if (SrcChild->getOperator()->isSubClassOf("ComplexPattern")) {
+      // When a ComplexPattern is used as an operator, it should do the same
+      // thing as when used as a leaf. However, the children of the operator
+      // name the sub-operands that make up the complex operand and we must
+      // prepare to reference them in the renderer too.
+      unsigned RendererID = TempOpIdx;
+      if (auto Error = importComplexPatternOperandMatcher(
+              OM, SrcChild->getOperator(), TempOpIdx))
+        return Error;
+
+      for (unsigned i = 0, e = SrcChild->getNumChildren(); i != e; ++i) {
+        auto *SubOperand = SrcChild->getChild(i);
+        if (!SubOperand->getName().empty())
+          Rule.defineComplexSubOperand(SubOperand->getName(),
+                                       SrcChild->getOperator(), RendererID, i);
+      }
+
+      return Error::success();
+    }
+
+    auto MaybeInsnOperand = OM.addPredicate<InstructionOperandMatcher>(
+        InsnMatcher.getRuleMatcher(), SrcChild->getName());
+    if (!MaybeInsnOperand.hasValue()) {
+      // This isn't strictly true. If the user were to provide exactly the same
+      // matchers as the original operand then we could allow it. However, it's
+      // simpler to not permit the redundant specification.
+      return failedImport("Nested instruction cannot be the same as another operand");
+    }
+
     // Map the node to a gMIR instruction.
-    InstructionOperandMatcher &InsnOperand =
-        OM.addPredicate<InstructionOperandMatcher>();
+    InstructionOperandMatcher &InsnOperand = **MaybeInsnOperand;
     auto InsnMatcherOrError = createAndImportSelDAGMatcher(
-        InsnOperand.getInsnMatcher(), SrcChild, TempOpIdx);
+        Rule, InsnOperand.getInsnMatcher(), SrcChild, TempOpIdx);
     if (auto Error = InsnMatcherOrError.takeError())
       return Error;
 
     return Error::success();
   }
 
+  if (SrcChild->hasAnyPredicate())
+    return failedImport("Src pattern child has unsupported predicate");
+
   // Check for constant immediates.
   if (auto *ChildInt = dyn_cast<IntInit>(SrcChild->getLeafValue())) {
     OM.addPredicate<ConstantIntOperandMatcher>(ChildInt->getValue());
@@ -1617,19 +2946,17 @@ Error GlobalISelEmitter::importChildMatcher(InstructionMatcher &InsnMatcher,
       return Error::success();
     }
 
-    // Check for ComplexPattern's.
-    if (ChildRec->isSubClassOf("ComplexPattern")) {
-      const auto &ComplexPattern = ComplexPatternEquivs.find(ChildRec);
-      if (ComplexPattern == ComplexPatternEquivs.end())
-        return failedImport("SelectionDAG ComplexPattern (" +
-                            ChildRec->getName() + ") not mapped to GlobalISel");
-
-      OM.addPredicate<ComplexPatternOperandMatcher>(OM,
-                                                    *ComplexPattern->second);
-      TempOpIdx++;
+    // Check for ValueType.
+    if (ChildRec->isSubClassOf("ValueType")) {
+      // We already added a type check as standard practice so this doesn't need
+      // to do anything.
       return Error::success();
     }
 
+    // Check for ComplexPattern's.
+    if (ChildRec->isSubClassOf("ComplexPattern"))
+      return importComplexPatternOperandMatcher(OM, ChildRec, TempOpIdx);
+
     if (ChildRec->isSubClassOf("ImmLeaf")) {
       return failedImport(
           "Src pattern child def is an unsupported tablegen class (ImmLeaf)");
@@ -1642,49 +2969,112 @@ Error GlobalISelEmitter::importChildMatcher(InstructionMatcher &InsnMatcher,
   return failedImport("Src pattern child is an unsupported kind");
 }
 
-Error GlobalISelEmitter::importExplicitUseRenderer(
-    BuildMIAction &DstMIBuilder, TreePatternNode *DstChild,
-    const InstructionMatcher &InsnMatcher) const {
-  // The only non-leaf child we accept is 'bb': it's an operator because
-  // BasicBlockSDNode isn't inline, but in MI it's just another operand.
+Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
+    action_iterator InsertPt, RuleMatcher &Rule, BuildMIAction &DstMIBuilder,
+    TreePatternNode *DstChild) {
+  if (DstChild->getTransformFn() != nullptr) {
+    return failedImport("Dst pattern child has transform fn " +
+                        DstChild->getTransformFn()->getName());
+  }
+
+  const auto &SubOperand = Rule.getComplexSubOperand(DstChild->getName());
+  if (SubOperand.hasValue()) {
+    DstMIBuilder.addRenderer<RenderComplexPatternOperand>(
+        *std::get<0>(*SubOperand), DstChild->getName(),
+        std::get<1>(*SubOperand), std::get<2>(*SubOperand));
+    return InsertPt;
+  }
+
   if (!DstChild->isLeaf()) {
+    // We accept 'bb' here. It's an operator because BasicBlockSDNode isn't
+    // inline, but in MI it's just another operand.
     if (DstChild->getOperator()->isSubClassOf("SDNode")) {
       auto &ChildSDNI = CGP.getSDNodeInfo(DstChild->getOperator());
       if (ChildSDNI.getSDClassName() == "BasicBlockSDNode") {
-        DstMIBuilder.addRenderer<CopyRenderer>(0, InsnMatcher,
-                                               DstChild->getName());
-        return Error::success();
+        DstMIBuilder.addRenderer<CopyRenderer>(DstChild->getName());
+        return InsertPt;
       }
     }
-    return failedImport("Dst pattern child isn't a leaf node or an MBB");
+
+    // Similarly, imm is an operator in TreePatternNode's view but must be
+    // rendered as operands.
+    // FIXME: The target should be able to choose sign-extended when appropriate
+    //        (e.g. on Mips).
+    if (DstChild->getOperator()->getName() == "imm") {
+      DstMIBuilder.addRenderer<CopyConstantAsImmRenderer>(DstChild->getName());
+      return InsertPt;
+    } else if (DstChild->getOperator()->getName() == "fpimm") {
+      DstMIBuilder.addRenderer<CopyFConstantAsFPImmRenderer>(
+          DstChild->getName());
+      return InsertPt;
+    }
+
+    if (DstChild->getOperator()->isSubClassOf("Instruction")) {
+      ArrayRef<TypeSetByHwMode> ChildTypes = DstChild->getExtTypes();
+      if (ChildTypes.size() != 1)
+        return failedImport("Dst pattern child has multiple results");
+
+      Optional<LLTCodeGen> OpTyOrNone = None;
+      if (ChildTypes.front().isMachineValueType())
+        OpTyOrNone =
+            MVTToLLT(ChildTypes.front().getMachineValueType().SimpleTy);
+      if (!OpTyOrNone)
+        return failedImport("Dst operand has an unsupported type");
+
+      unsigned TempRegID = Rule.allocateTempRegID();
+      InsertPt = Rule.insertAction<MakeTempRegisterAction>(
+          InsertPt, OpTyOrNone.getValue(), TempRegID);
+      DstMIBuilder.addRenderer<TempRegRenderer>(TempRegID);
+
+      auto InsertPtOrError = createAndImportSubInstructionRenderer(
+          ++InsertPt, Rule, DstChild, TempRegID);
+      if (auto Error = InsertPtOrError.takeError())
+        return std::move(Error);
+      return InsertPtOrError.get();
+    }
+
+    return failedImport("Dst pattern child isn't a leaf node or an MBB" + llvm::to_string(*DstChild));
   }
 
-  // Otherwise, we're looking for a bog-standard RegisterClass operand.
-  if (DstChild->hasAnyPredicate())
-    return failedImport("Dst pattern child has predicate (" +
-                        explainPredicates(DstChild) + ")");
+  // It could be a specific immediate in which case we should just check for
+  // that immediate.
+  if (const IntInit *ChildIntInit =
+          dyn_cast<IntInit>(DstChild->getLeafValue())) {
+    DstMIBuilder.addRenderer<ImmRenderer>(ChildIntInit->getValue());
+    return InsertPt;
+  }
 
+  // Otherwise, we're looking for a bog-standard RegisterClass operand.
   if (auto *ChildDefInit = dyn_cast<DefInit>(DstChild->getLeafValue())) {
     auto *ChildRec = ChildDefInit->getDef();
 
-    ArrayRef<EEVT::TypeSet> ChildTypes = DstChild->getExtTypes();
+    ArrayRef<TypeSetByHwMode> ChildTypes = DstChild->getExtTypes();
     if (ChildTypes.size() != 1)
       return failedImport("Dst pattern child has multiple results");
 
-    auto OpTyOrNone = MVTToLLT(ChildTypes.front().getConcrete());
+    Optional<LLTCodeGen> OpTyOrNone = None;
+    if (ChildTypes.front().isMachineValueType())
+      OpTyOrNone = MVTToLLT(ChildTypes.front().getMachineValueType().SimpleTy);
     if (!OpTyOrNone)
       return failedImport("Dst operand has an unsupported type");
 
     if (ChildRec->isSubClassOf("Register")) {
-      DstMIBuilder.addRenderer<AddRegisterRenderer>(0, ChildRec);
-      return Error::success();
+      DstMIBuilder.addRenderer<AddRegisterRenderer>(ChildRec);
+      return InsertPt;
     }
 
     if (ChildRec->isSubClassOf("RegisterClass") ||
-        ChildRec->isSubClassOf("RegisterOperand")) {
-      DstMIBuilder.addRenderer<CopyRenderer>(0, InsnMatcher,
-                                             DstChild->getName());
-      return Error::success();
+        ChildRec->isSubClassOf("RegisterOperand") ||
+        ChildRec->isSubClassOf("ValueType")) {
+      if (ChildRec->isSubClassOf("RegisterOperand") &&
+          !ChildRec->isValueUnset("GIZeroRegister")) {
+        DstMIBuilder.addRenderer<CopyOrAddZeroRegRenderer>(
+            DstChild->getName(), ChildRec->getValueAsDef("GIZeroRegister"));
+        return InsertPt;
+      }
+
+      DstMIBuilder.addRenderer<CopyRenderer>(DstChild->getName());
+      return InsertPt;
     }
 
     if (ChildRec->isSubClassOf("ComplexPattern")) {
@@ -1693,11 +3083,11 @@ Error GlobalISelEmitter::importExplicitUseRenderer(
         return failedImport(
             "SelectionDAG ComplexPattern not mapped to GlobalISel");
 
-      const OperandMatcher &OM = InsnMatcher.getOperand(DstChild->getName());
+      const OperandMatcher &OM = Rule.getOperandMatcher(DstChild->getName());
       DstMIBuilder.addRenderer<RenderComplexPatternOperand>(
-          0, *ComplexPattern->second, DstChild->getName(),
+          *ComplexPattern->second, DstChild->getName(),
           OM.getAllocatedTemporariesBaseID());
-      return Error::success();
+      return InsertPt;
     }
 
     if (ChildRec->isSubClassOf("SDNodeXForm"))
@@ -1712,8 +3102,50 @@ Error GlobalISelEmitter::importExplicitUseRenderer(
 }
 
 Expected<BuildMIAction &> GlobalISelEmitter::createAndImportInstructionRenderer(
-    RuleMatcher &M, const TreePatternNode *Dst,
-    const InstructionMatcher &InsnMatcher) {
+    RuleMatcher &M, const TreePatternNode *Dst) {
+  auto InsertPtOrError = createInstructionRenderer(M.actions_end(), M, Dst);
+  if (auto Error = InsertPtOrError.takeError())
+    return std::move(Error);
+
+  action_iterator InsertPt = InsertPtOrError.get();
+  BuildMIAction &DstMIBuilder = *static_cast<BuildMIAction *>(InsertPt->get());
+
+  importExplicitDefRenderers(DstMIBuilder);
+
+  if (auto Error = importExplicitUseRenderers(InsertPt, M, DstMIBuilder, Dst)
+                       .takeError())
+    return std::move(Error);
+
+  return DstMIBuilder;
+}
+
+Expected<action_iterator>
+GlobalISelEmitter::createAndImportSubInstructionRenderer(
+    action_iterator InsertPt, RuleMatcher &M, const TreePatternNode *Dst,
+    unsigned TempRegID) {
+  auto InsertPtOrError = createInstructionRenderer(InsertPt, M, Dst);
+
+  // TODO: Assert there's exactly one result.
+
+  if (auto Error = InsertPtOrError.takeError())
+    return std::move(Error);
+  InsertPt = InsertPtOrError.get();
+
+  BuildMIAction &DstMIBuilder =
+      *static_cast<BuildMIAction *>(InsertPtOrError.get()->get());
+
+  // Assign the result to TempReg.
+  DstMIBuilder.addRenderer<TempRegRenderer>(TempRegID, true);
+
+  InsertPtOrError = importExplicitUseRenderers(InsertPt, M, DstMIBuilder, Dst);
+  if (auto Error = InsertPtOrError.takeError())
+    return std::move(Error);
+
+  return InsertPtOrError.get();
+}
+
+Expected<action_iterator> GlobalISelEmitter::createInstructionRenderer(
+    action_iterator InsertPt, RuleMatcher &M, const TreePatternNode *Dst) {
   Record *DstOp = Dst->getOperator();
   if (!DstOp->isSubClassOf("Instruction")) {
     if (DstOp->isSubClassOf("ValueType"))
@@ -1723,38 +3155,47 @@ Expected<BuildMIAction &> GlobalISelEmitter::createAndImportInstructionRenderer(
   }
   CodeGenInstruction *DstI = &Target.getInstruction(DstOp);
 
-  unsigned DstINumUses = DstI->Operands.size() - DstI->Operands.NumDefs;
-  unsigned ExpectedDstINumUses = Dst->getNumChildren();
-  bool IsExtractSubReg = false;
-
   // COPY_TO_REGCLASS is just a copy with a ConstrainOperandToRegClassAction
   // attached. Similarly for EXTRACT_SUBREG except that's a subregister copy.
-  if (DstI->TheDef->getName() == "COPY_TO_REGCLASS") {
+  if (DstI->TheDef->getName() == "COPY_TO_REGCLASS")
     DstI = &Target.getInstruction(RK.getDef("COPY"));
-    DstINumUses--; // Ignore the class constraint.
-    ExpectedDstINumUses--;
-  } else if (DstI->TheDef->getName() == "EXTRACT_SUBREG") {
+  else if (DstI->TheDef->getName() == "EXTRACT_SUBREG")
     DstI = &Target.getInstruction(RK.getDef("COPY"));
-    IsExtractSubReg = true;
-  }
+  else if (DstI->TheDef->getName() == "REG_SEQUENCE")
+    return failedImport("Unable to emit REG_SEQUENCE");
 
-  auto &DstMIBuilder = M.addAction<BuildMIAction>(0, DstI, InsnMatcher);
+  return M.insertAction<BuildMIAction>(InsertPt, M.allocateOutputInsnID(),
+                                       DstI);
+}
 
-  // Render the explicit defs.
+void GlobalISelEmitter::importExplicitDefRenderers(
+    BuildMIAction &DstMIBuilder) {
+  const CodeGenInstruction *DstI = DstMIBuilder.getCGI();
   for (unsigned I = 0; I < DstI->Operands.NumDefs; ++I) {
     const CGIOperandList::OperandInfo &DstIOperand = DstI->Operands[I];
-    DstMIBuilder.addRenderer<CopyRenderer>(0, InsnMatcher, DstIOperand.Name);
+    DstMIBuilder.addRenderer<CopyRenderer>(DstIOperand.Name);
   }
+}
+
+Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(
+    action_iterator InsertPt, RuleMatcher &M, BuildMIAction &DstMIBuilder,
+    const llvm::TreePatternNode *Dst) {
+  const CodeGenInstruction *DstI = DstMIBuilder.getCGI();
+  CodeGenInstruction *OrigDstI = &Target.getInstruction(Dst->getOperator());
 
   // EXTRACT_SUBREG needs to use a subregister COPY.
-  if (IsExtractSubReg) {
+  if (OrigDstI->TheDef->getName() == "EXTRACT_SUBREG") {
     if (!Dst->getChild(0)->isLeaf())
       return failedImport("EXTRACT_SUBREG child #1 is not a leaf");
 
     if (DefInit *SubRegInit =
             dyn_cast<DefInit>(Dst->getChild(1)->getLeafValue())) {
-      CodeGenRegisterClass *RC = CGRegs.getRegClass(
-          getInitValueAsRegClass(Dst->getChild(0)->getLeafValue()));
+      Record *RCDef = getInitValueAsRegClass(Dst->getChild(0)->getLeafValue());
+      if (!RCDef)
+        return failedImport("EXTRACT_SUBREG child #0 could not "
+                            "be coerced to a register class");
+
+      CodeGenRegisterClass *RC = CGRegs.getRegClass(RCDef);
       CodeGenSubRegIndex *SubIdx = CGRegs.getSubRegIdx(SubRegInit->getDef());
 
       const auto &SrcRCDstRCPair =
@@ -1765,15 +3206,22 @@ Expected<BuildMIAction &> GlobalISelEmitter::createAndImportInstructionRenderer(
           return failedImport("EXTRACT_SUBREG requires an additional COPY");
       }
 
-      DstMIBuilder.addRenderer<CopySubRegRenderer>(
-          0, InsnMatcher, Dst->getChild(0)->getName(), SubIdx);
-      return DstMIBuilder;
+      DstMIBuilder.addRenderer<CopySubRegRenderer>(Dst->getChild(0)->getName(),
+                                                   SubIdx);
+      return InsertPt;
     }
 
     return failedImport("EXTRACT_SUBREG child #1 is not a subreg index");
   }
 
   // Render the explicit uses.
+  unsigned DstINumUses = OrigDstI->Operands.size() - OrigDstI->Operands.NumDefs;
+  unsigned ExpectedDstINumUses = Dst->getNumChildren();
+  if (OrigDstI->TheDef->getName() == "COPY_TO_REGCLASS") {
+    DstINumUses--; // Ignore the class constraint.
+    ExpectedDstINumUses--;
+  }
+
   unsigned Child = 0;
   unsigned NumDefaultOps = 0;
   for (unsigned I = 0; I != DstINumUses; ++I) {
@@ -1793,9 +3241,11 @@ Expected<BuildMIAction &> GlobalISelEmitter::createAndImportInstructionRenderer(
       continue;
     }
 
-    if (auto Error = importExplicitUseRenderer(
-            DstMIBuilder, Dst->getChild(Child), InsnMatcher))
+    auto InsertPtOrError = importExplicitUseRenderer(InsertPt, M, DstMIBuilder,
+                                                     Dst->getChild(Child));
+    if (auto Error = InsertPtOrError.takeError())
       return std::move(Error);
+    InsertPt = InsertPtOrError.get();
     ++Child;
   }
 
@@ -1806,7 +3256,7 @@ Expected<BuildMIAction &> GlobalISelEmitter::createAndImportInstructionRenderer(
                         " explicit ones and " + llvm::to_string(NumDefaultOps) +
                         " default ones");
 
-  return DstMIBuilder;
+  return InsertPt;
 }
 
 Error GlobalISelEmitter::importDefaultOperandRenderers(
@@ -1822,12 +3272,12 @@ Error GlobalISelEmitter::importDefaultOperandRenderers(
     }
 
     if (const DefInit *DefaultDefOp = dyn_cast<DefInit>(DefaultOp)) {
-      DstMIBuilder.addRenderer<AddRegisterRenderer>(0, DefaultDefOp->getDef());
+      DstMIBuilder.addRenderer<AddRegisterRenderer>(DefaultDefOp->getDef());
       continue;
     }
 
     if (const IntInit *DefaultIntOp = dyn_cast<IntInit>(DefaultOp)) {
-      DstMIBuilder.addRenderer<ImmRenderer>(0, DefaultIntOp->getValue());
+      DstMIBuilder.addRenderer<ImmRenderer>(DefaultIntOp->getValue());
       continue;
     }
 
@@ -1847,10 +3297,12 @@ Error GlobalISelEmitter::importImplicitDefRenderers(
 
 Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
   // Keep track of the matchers and actions to emit.
-  RuleMatcher M;
-  M.addAction<DebugCommentAction>(P);
+  RuleMatcher M(P.getSrcRecord()->getLoc());
+  M.addAction<DebugCommentAction>(llvm::to_string(*P.getSrcPattern()) +
+                                  "  =>  " +
+                                  llvm::to_string(*P.getDstPattern()));
 
-  if (auto Error = importRulePredicates(M, P.getPredicates()->getValues()))
+  if (auto Error = importRulePredicates(M, P.getPredicates()))
     return std::move(Error);
 
   // Next, analyze the pattern operators.
@@ -1865,8 +3317,68 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
     return failedImport("Src pattern root isn't a trivial operator (" +
                         toString(std::move(Err)) + ")");
 
-  if (Dst->isLeaf())
+  // The different predicates and matchers created during
+  // addInstructionMatcher use the RuleMatcher M to set up their
+  // instruction ID (InsnVarID) that are going to be used when
+  // M is going to be emitted.
+  // However, the code doing the emission still relies on the IDs
+  // returned during that process by the RuleMatcher when issuing
+  // the recordInsn opcodes.
+  // Because of that:
+  // 1. The order in which we created the predicates
+  //    and such must be the same as the order in which we emit them,
+  //    and
+  // 2. We need to reset the generation of the IDs in M somewhere between
+  //    addInstructionMatcher and emit
+  //
+  // FIXME: Long term, we don't want to have to rely on this implicit
+  // naming being the same. One possible solution would be to have
+  // explicit operator for operation capture and reference those.
+  // The plus side is that it would expose opportunities to share
+  // the capture accross rules. The downside is that it would
+  // introduce a dependency between predicates (captures must happen
+  // before their first use.)
+  InstructionMatcher &InsnMatcherTemp = M.addInstructionMatcher(Src->getName());
+  unsigned TempOpIdx = 0;
+  auto InsnMatcherOrError =
+      createAndImportSelDAGMatcher(M, InsnMatcherTemp, Src, TempOpIdx);
+  // Reset the ID generation so that the emitted IDs match the ones
+  // in the InstructionMatcher and such.
+  M.clearImplicitMap();
+  if (auto Error = InsnMatcherOrError.takeError())
+    return std::move(Error);
+  InstructionMatcher &InsnMatcher = InsnMatcherOrError.get();
+
+  if (Dst->isLeaf()) {
+    Record *RCDef = getInitValueAsRegClass(Dst->getLeafValue());
+
+    const CodeGenRegisterClass &RC = Target.getRegisterClass(RCDef);
+    if (RCDef) {
+      // We need to replace the def and all its uses with the specified
+      // operand. However, we must also insert COPY's wherever needed.
+      // For now, emit a copy and let the register allocator clean up.
+      auto &DstI = Target.getInstruction(RK.getDef("COPY"));
+      const auto &DstIOperand = DstI.Operands[0];
+
+      OperandMatcher &OM0 = InsnMatcher.getOperand(0);
+      OM0.setSymbolicName(DstIOperand.Name);
+      M.defineOperand(OM0.getSymbolicName(), OM0);
+      OM0.addPredicate<RegisterBankOperandMatcher>(RC);
+
+      auto &DstMIBuilder =
+          M.addAction<BuildMIAction>(M.allocateOutputInsnID(), &DstI);
+      DstMIBuilder.addRenderer<CopyRenderer>(DstIOperand.Name);
+      DstMIBuilder.addRenderer<CopyRenderer>(Dst->getName());
+      M.addAction<ConstrainOperandToRegClassAction>(0, 0, RC);
+
+      // We're done with this pattern!  It's eligible for GISel emission; return
+      // it.
+      ++NumPatternImported;
+      return std::move(M);
+    }
+
     return failedImport("Dst pattern root isn't a known leaf");
+  }
 
   // Start with the defined operands (i.e., the results of the root operator).
   Record *DstOp = Dst->getOperator();
@@ -1879,19 +3391,11 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
                         to_string(Src->getExtTypes().size()) + " def(s) vs " +
                         to_string(DstI.Operands.NumDefs) + " def(s))");
 
-  InstructionMatcher &InsnMatcherTemp = M.addInstructionMatcher();
-  unsigned TempOpIdx = 0;
-  auto InsnMatcherOrError =
-      createAndImportSelDAGMatcher(InsnMatcherTemp, Src, TempOpIdx);
-  if (auto Error = InsnMatcherOrError.takeError())
-    return std::move(Error);
-  InstructionMatcher &InsnMatcher = InsnMatcherOrError.get();
-
   // The root of the match also has constraints on the register bank so that it
   // matches the result instruction.
   unsigned OpIdx = 0;
-  for (const EEVT::TypeSet &Ty : Src->getExtTypes()) {
-    (void)Ty;
+  for (const TypeSetByHwMode &VTy : Src->getExtTypes()) {
+    (void)VTy;
 
     const auto &DstIOperand = DstI.Operands[OpIdx];
     Record *DstIOpRec = DstIOperand.Rec;
@@ -1920,13 +3424,13 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
 
     OperandMatcher &OM = InsnMatcher.getOperand(OpIdx);
     OM.setSymbolicName(DstIOperand.Name);
+    M.defineOperand(OM.getSymbolicName(), OM);
     OM.addPredicate<RegisterBankOperandMatcher>(
         Target.getRegisterClass(DstIOpRec));
     ++OpIdx;
   }
 
-  auto DstMIBuilderOrError =
-      createAndImportInstructionRenderer(M, Dst, InsnMatcher);
+  auto DstMIBuilderOrError = createAndImportInstructionRenderer(M, Dst);
   if (auto Error = DstMIBuilderOrError.takeError())
     return std::move(Error);
   BuildMIAction &DstMIBuilder = DstMIBuilderOrError.get();
@@ -1936,6 +3440,8 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
   if (auto Error = importImplicitDefRenderers(DstMIBuilder, P.getDstRegs()))
     return std::move(Error);
 
+  DstMIBuilder.chooseInsnToMutate(M);
+
   // Constrain the registers to classes. This is normally derived from the
   // emitted instruction but a few instructions require special handling.
   if (DstI.TheDef->getName() == "COPY_TO_REGCLASS") {
@@ -2006,7 +3512,92 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
   return std::move(M);
 }
 
+// Emit imm predicate table and an enum to reference them with.
+// The 'Predicate_' part of the name is redundant but eliminating it is more
+// trouble than it's worth.
+void GlobalISelEmitter::emitImmPredicates(
+    raw_ostream &OS, StringRef TypeIdentifier, StringRef Type,
+    std::function<bool(const Record *R)> Filter) {
+  std::vector<const Record *> MatchedRecords;
+  const auto &Defs = RK.getAllDerivedDefinitions("PatFrag");
+  std::copy_if(Defs.begin(), Defs.end(), std::back_inserter(MatchedRecords),
+               [&](Record *Record) {
+                 return !Record->getValueAsString("ImmediateCode").empty() &&
+                        Filter(Record);
+               });
+
+  if (!MatchedRecords.empty()) {
+    OS << "// PatFrag predicates.\n"
+       << "enum {\n";
+    std::string EnumeratorSeparator =
+        (" = GIPFP_" + TypeIdentifier + "_Invalid + 1,\n").str();
+    for (const auto *Record : MatchedRecords) {
+      OS << "  GIPFP_" << TypeIdentifier << "_Predicate_" << Record->getName()
+         << EnumeratorSeparator;
+      EnumeratorSeparator = ",\n";
+    }
+    OS << "};\n";
+  }
+
+  for (const auto *Record : MatchedRecords)
+    OS << "static bool Predicate_" << Record->getName() << "(" << Type
+       << " Imm) {" << Record->getValueAsString("ImmediateCode") << "}\n";
+
+  OS << "static InstructionSelector::" << TypeIdentifier
+     << "ImmediatePredicateFn " << TypeIdentifier << "ImmPredicateFns[] = {\n"
+     << "  nullptr,\n";
+  for (const auto *Record : MatchedRecords)
+    OS << "  Predicate_" << Record->getName() << ",\n";
+  OS << "};\n";
+}
+
+std::vector<Matcher *> GlobalISelEmitter::optimizeRules(
+    std::vector<RuleMatcher> &Rules,
+    std::vector<std::unique_ptr<GroupMatcher>> &StorageGroupMatcher) {
+  std::vector<Matcher *> OptRules;
+  // Start with a stupid grouping for now.
+  std::unique_ptr<GroupMatcher> CurrentGroup = make_unique<GroupMatcher>();
+  assert(CurrentGroup->conditions_empty());
+  unsigned NbGroup = 0;
+  for (RuleMatcher &Rule : Rules) {
+    std::unique_ptr<PredicateMatcher> Predicate = Rule.forgetFirstCondition();
+    if (!CurrentGroup->conditions_empty() &&
+        !CurrentGroup->lastConditionMatches(*Predicate)) {
+      // Start a new group.
+      ++NbGroup;
+      OptRules.push_back(CurrentGroup.get());
+      StorageGroupMatcher.emplace_back(std::move(CurrentGroup));
+      CurrentGroup = make_unique<GroupMatcher>();
+      assert(CurrentGroup->conditions_empty());
+    }
+    if (CurrentGroup->conditions_empty())
+      CurrentGroup->addCondition(std::move(Predicate));
+    CurrentGroup->addRule(Rule);
+  }
+  if (!CurrentGroup->conditions_empty()) {
+    ++NbGroup;
+    OptRules.push_back(CurrentGroup.get());
+    StorageGroupMatcher.emplace_back(std::move(CurrentGroup));
+  }
+  DEBUG(dbgs() << "NbGroup: " << NbGroup << "\n");
+  return OptRules;
+}
+
 void GlobalISelEmitter::run(raw_ostream &OS) {
+  if (!UseCoverageFile.empty()) {
+    RuleCoverage = CodeGenCoverage();
+    auto RuleCoverageBufOrErr = MemoryBuffer::getFile(UseCoverageFile);
+    if (!RuleCoverageBufOrErr) {
+      PrintWarning(SMLoc(), "Missing rule coverage data");
+      RuleCoverage = None;
+    } else {
+      if (!RuleCoverage->parse(*RuleCoverageBufOrErr.get(), Target.getName())) {
+        PrintWarning(SMLoc(), "Ignoring invalid or missing rule coverage data");
+        RuleCoverage = None;
+      }
+    }
+  }
+
   // Track the GINodeEquiv definitions.
   gatherNodeEquivs();
 
@@ -2016,6 +3607,7 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
   // Look through the SelectionDAG patterns we found, possibly emitting some.
   for (const PatternToMatch &Pat : CGP.ptms()) {
     ++NumPatternTotal;
+
     auto MatcherOrErr = runOnPattern(Pat);
 
     // The pattern analysis can fail, indicating an unsupported pattern.
@@ -2031,20 +3623,16 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
       continue;
     }
 
+    if (RuleCoverage) {
+      if (RuleCoverage->isCovered(MatcherOrErr->getRuleID()))
+        ++NumPatternsTested;
+      else
+        PrintWarning(Pat.getSrcRecord()->getLoc(),
+                     "Pattern is not covered by a test");
+    }
     Rules.push_back(std::move(MatcherOrErr.get()));
   }
 
-  std::stable_sort(Rules.begin(), Rules.end(),
-            [&](const RuleMatcher &A, const RuleMatcher &B) {
-              if (A.isHigherPriorityThan(B)) {
-                assert(!B.isHigherPriorityThan(A) && "Cannot be more important "
-                                                     "and less important at "
-                                                     "the same time");
-                return true;
-              }
-              return false;
-            });
-
   std::vector<Record *> ComplexPredicates =
       RK.getAllDerivedDefinitions("GIComplexOperandMatcher");
   std::sort(ComplexPredicates.begin(), ComplexPredicates.end(),
@@ -2067,22 +3655,19 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
   OS << "#ifdef GET_GLOBALISEL_TEMPORARIES_DECL\n"
      << "  mutable MatcherState State;\n"
      << "  typedef "
-        "ComplexRendererFn("
+        "ComplexRendererFns("
      << Target.getName()
      << "InstructionSelector::*ComplexMatcherMemFn)(MachineOperand &) const;\n"
-     << "const MatcherInfoTy<PredicateBitset, ComplexMatcherMemFn> "
+     << "  const MatcherInfoTy<PredicateBitset, ComplexMatcherMemFn> "
         "MatcherInfo;\n"
+     << "  static " << Target.getName()
+     << "InstructionSelector::ComplexMatcherMemFn ComplexPredicateFns[];\n"
      << "#endif // ifdef GET_GLOBALISEL_TEMPORARIES_DECL\n\n";
 
   OS << "#ifdef GET_GLOBALISEL_TEMPORARIES_INIT\n"
      << ", State(" << MaxTemporaries << "),\n"
-     << "MatcherInfo({TypeObjects, FeatureBitsets, {\n"
-     << "  nullptr, // GICP_Invalid\n";
-  for (const auto &Record : ComplexPredicates)
-    OS << "  &" << Target.getName()
-       << "InstructionSelector::" << Record->getValueAsString("MatcherFn")
-       << ", // " << Record->getName() << "\n";
-  OS << "}})\n"
+     << "MatcherInfo({TypeObjects, FeatureBitsets, I64ImmPredicateFns, "
+        "APIntImmPredicateFns, APFloatImmPredicateFns, ComplexPredicateFns})\n"
      << "#endif // ifdef GET_GLOBALISEL_TEMPORARIES_INIT\n\n";
 
   OS << "#ifdef GET_GLOBALISEL_IMPL\n";
@@ -2113,18 +3698,12 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
 
   // Emit a table containing the LLT objects needed by the matcher and an enum
   // for the matcher to reference them with.
-  std::vector<LLTCodeGen> TypeObjects = {
-      LLT::scalar(8),      LLT::scalar(16),     LLT::scalar(32),
-      LLT::scalar(64),     LLT::scalar(80),     LLT::vector(8, 1),
-      LLT::vector(16, 1),  LLT::vector(32, 1),  LLT::vector(64, 1),
-      LLT::vector(8, 8),   LLT::vector(16, 8),  LLT::vector(32, 8),
-      LLT::vector(64, 8),  LLT::vector(4, 16),  LLT::vector(8, 16),
-      LLT::vector(16, 16), LLT::vector(32, 16), LLT::vector(2, 32),
-      LLT::vector(4, 32),  LLT::vector(8, 32),  LLT::vector(16, 32),
-      LLT::vector(2, 64),  LLT::vector(4, 64),  LLT::vector(8, 64),
-  };
+  std::vector<LLTCodeGen> TypeObjects;
+  for (const auto &Ty : LLTOperandMatcher::KnownTypes)
+    TypeObjects.push_back(Ty);
   std::sort(TypeObjects.begin(), TypeObjects.end());
-  OS << "enum {\n";
+  OS << "// LLT Objects.\n"
+     << "enum {\n";
   for (const auto &TypeObject : TypeObjects) {
     OS << "  ";
     TypeObject.emitCxxEnumValue(OS);
@@ -2162,7 +3741,8 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
   FeatureBitsets.erase(
       std::unique(FeatureBitsets.begin(), FeatureBitsets.end()),
       FeatureBitsets.end());
-  OS << "enum {\n"
+  OS << "// Feature bitsets.\n"
+     << "enum {\n"
      << "  GIFBS_Invalid,\n";
   for (const auto &FeatureBitset : FeatureBitsets) {
     if (FeatureBitset.empty())
@@ -2186,15 +3766,40 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
   OS << "};\n\n";
 
   // Emit complex predicate table and an enum to reference them with.
-  OS << "enum {\n"
+  OS << "// ComplexPattern predicates.\n"
+     << "enum {\n"
      << "  GICP_Invalid,\n";
   for (const auto &Record : ComplexPredicates)
     OS << "  GICP_" << Record->getName() << ",\n";
   OS << "};\n"
      << "// See constructor for table contents\n\n";
 
+  emitImmPredicates(OS, "I64", "int64_t", [](const Record *R) {
+    bool Unset;
+    return !R->getValueAsBitOrUnset("IsAPFloat", Unset) &&
+           !R->getValueAsBit("IsAPInt");
+  });
+  emitImmPredicates(OS, "APFloat", "const APFloat &", [](const Record *R) {
+    bool Unset;
+    return R->getValueAsBitOrUnset("IsAPFloat", Unset);
+  });
+  emitImmPredicates(OS, "APInt", "const APInt &", [](const Record *R) {
+    return R->getValueAsBit("IsAPInt");
+  });
+  OS << "\n";
+
+  OS << Target.getName() << "InstructionSelector::ComplexMatcherMemFn\n"
+     << Target.getName() << "InstructionSelector::ComplexPredicateFns[] = {\n"
+     << "  nullptr, // GICP_Invalid\n";
+  for (const auto &Record : ComplexPredicates)
+    OS << "  &" << Target.getName()
+       << "InstructionSelector::" << Record->getValueAsString("MatcherFn")
+       << ", // " << Record->getName() << "\n";
+  OS << "};\n\n";
+
   OS << "bool " << Target.getName()
-     << "InstructionSelector::selectImpl(MachineInstr &I) const {\n"
+     << "InstructionSelector::selectImpl(MachineInstr &I, CodeGenCoverage "
+        "&CoverageInfo) const {\n"
      << "  MachineFunction &MF = *I.getParent()->getParent();\n"
      << "  MachineRegisterInfo &MRI = MF.getRegInfo();\n"
      << "  // FIXME: This should be computed on a per-function basis rather "
@@ -2206,13 +3811,37 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
      << "  State.MIs.clear();\n"
      << "  State.MIs.push_back(&I);\n\n";
 
-  for (auto &Rule : Rules) {
-    Rule.emit(OS);
-    ++CurrentMatchTableID;
+  std::stable_sort(Rules.begin(), Rules.end(), [&](const RuleMatcher &A,
+                                                   const RuleMatcher &B) {
+    if (A.isHigherPriorityThan(B)) {
+      assert(!B.isHigherPriorityThan(A) && "Cannot be more important "
+                                           "and less important at "
+                                           "the same time");
+      return true;
+    }
+    return false;
+  });
+  std::vector<std::unique_ptr<GroupMatcher>> StorageGroupMatcher;
+
+  std::vector<Matcher *> OptRules;
+  if (OptimizeMatchTable)
+    OptRules = optimizeRules(Rules, StorageGroupMatcher);
+  else
+    for (Matcher &Rule : Rules)
+      OptRules.push_back(&Rule);
+
+  MatchTable Table(0);
+  for (Matcher *Rule : OptRules) {
+    Rule->emit(Table);
     ++NumPatternEmitted;
-    assert(CurrentMatchTableID == NumPatternEmitted &&
-           "Statistic deviates from number of emitted tables");
   }
+  Table << MatchTable::Opcode("GIM_Reject") << MatchTable::LineBreak;
+  Table.emitDeclaration(OS);
+  OS << "  if (executeMatchTable(*this, OutMIs, State, MatcherInfo, ";
+  Table.emitUse(OS);
+  OS << ", TII, MRI, TRI, RBI, AvailableFeatures, CoverageInfo)) {\n"
+     << "    return true;\n"
+     << "  }\n\n";
 
   OS << "  return false;\n"
      << "}\n"
@@ -2245,6 +3874,138 @@ void GlobalISelEmitter::declareSubtargetFeature(Record *Predicate) {
         Predicate, SubtargetFeatureInfo(Predicate, SubtargetFeatures.size()));
 }
 
+TreePatternNode *GlobalISelEmitter::fixupPatternNode(TreePatternNode *N) {
+  if (!N->isLeaf()) {
+    for (unsigned I = 0, E = N->getNumChildren(); I < E; ++I) {
+      TreePatternNode *OrigChild = N->getChild(I);
+      TreePatternNode *NewChild = fixupPatternNode(OrigChild);
+      if (OrigChild != NewChild)
+        N->setChild(I, NewChild);
+    }
+
+    if (N->getOperator()->getName() == "ld") {
+      // If it's a signext-load we need to adapt the pattern slightly. We need
+      // to split the node into (sext (ld ...)), remove the <<signext>> predicate,
+      // and then apply the <<signextTY>> predicate by updating the result type
+      // of the load.
+      //
+      // For example:
+      //   (ld:[i32] [iPTR])<<unindexed>><<signext>><<signexti16>>
+      // must be transformed into:
+      //   (sext:[i32] (ld:[i16] [iPTR])<<unindexed>>)
+      //
+      // Likewise for zeroext-load and anyext-load.
+
+      std::vector<TreePredicateFn> Predicates;
+      bool IsSignExtLoad = false;
+      bool IsZeroExtLoad = false;
+      bool IsAnyExtLoad = false;
+      Record *MemVT = nullptr;
+      for (const auto &P : N->getPredicateFns()) {
+        if (P.isLoad() && P.isSignExtLoad()) {
+          IsSignExtLoad = true;
+          continue;
+        }
+        if (P.isLoad() && P.isZeroExtLoad()) {
+          IsZeroExtLoad = true;
+          continue;
+        }
+        if (P.isLoad() && P.isAnyExtLoad()) {
+          IsAnyExtLoad = true;
+          continue;
+        }
+        if (P.isLoad() && P.getMemoryVT()) {
+          MemVT = P.getMemoryVT();
+          continue;
+        }
+        Predicates.push_back(P);
+      }
+
+      if ((IsSignExtLoad || IsZeroExtLoad || IsAnyExtLoad) && MemVT) {
+        assert((IsSignExtLoad + IsZeroExtLoad + IsAnyExtLoad) == 1 &&
+               "IsSignExtLoad, IsZeroExtLoad, IsAnyExtLoad are mutually exclusive");
+        TreePatternNode *Ext = new TreePatternNode(
+            RK.getDef(IsSignExtLoad ? "sext"
+                                    : IsZeroExtLoad ? "zext" : "anyext"),
+            {N}, 1);
+        Ext->setType(0, N->getType(0));
+        N->clearPredicateFns();
+        N->setPredicateFns(Predicates);
+        N->setType(0, getValueType(MemVT));
+        return Ext;
+      }
+    }
+  }
+
+  return N;
+}
+
+void GlobalISelEmitter::fixupPatternTrees(TreePattern *P) {
+  for (unsigned I = 0, E = P->getNumTrees(); I < E; ++I) {
+    TreePatternNode *OrigTree = P->getTree(I);
+    TreePatternNode *NewTree = fixupPatternNode(OrigTree);
+    if (OrigTree != NewTree)
+      P->setTree(I, NewTree);
+  }
+}
+
+std::unique_ptr<PredicateMatcher> RuleMatcher::forgetFirstCondition() {
+  assert(!insnmatchers_empty() &&
+         "Trying to forget something that does not exist");
+
+  InstructionMatcher &Matcher = insnmatchers_front();
+  std::unique_ptr<PredicateMatcher> Condition;
+  if (!Matcher.predicates_empty())
+    Condition = Matcher.predicates_pop_front();
+  if (!Condition) {
+    // If there is no more predicate on the instruction itself, look at its
+    // operands.
+    assert(!Matcher.operands_empty() &&
+           "Empty instruction should have been discarded");
+    OperandMatcher &OpMatcher = **Matcher.operands_begin();
+    assert(!OpMatcher.predicates_empty() && "no operand constraint");
+    Condition = OpMatcher.predicates_pop_front();
+    // If this operand is free of constraints, rip it off.
+    if (OpMatcher.predicates_empty())
+      Matcher.pop_front();
+  }
+  // Rip the instruction off when it is empty.
+  if (Matcher.operands_empty() && Matcher.predicates_empty())
+    insnmatchers_pop_front();
+  return Condition;
+}
+
+bool GroupMatcher::lastConditionMatches(
+    const PredicateMatcher &Predicate) const {
+  const auto &LastCondition = conditions_back();
+  return Predicate.isIdentical(*LastCondition);
+}
+
+void GroupMatcher::emit(MatchTable &Table) {
+  unsigned LabelID = Table.allocateLabelID();
+  if (!conditions_empty()) {
+    Table << MatchTable::Opcode("GIM_Try", +1)
+          << MatchTable::Comment("On fail goto")
+          << MatchTable::JumpTarget(LabelID) << MatchTable::LineBreak;
+    for (auto &Condition : Conditions)
+      Condition->emitPredicateOpcodes(
+          Table, *static_cast<RuleMatcher *>(*Rules.begin()));
+  }
+  // Emit the conditions.
+  // Then checks apply the rules.
+  for (const auto &Rule : Rules)
+    Rule->emit(Table);
+  // If we don't succeeded for that block, that means we are not going to select
+  // this instruction.
+  if (!conditions_empty()) {
+    Table << MatchTable::Opcode("GIM_Reject") << MatchTable::LineBreak;
+    Table << MatchTable::Opcode("GIR_Done", -1) << MatchTable::LineBreak
+          << MatchTable::Label(LabelID);
+  }
+}
+
+unsigned OperandMatcher::getInsnVarID() const { return Insn.getVarID(); }
+
 } // end anonymous namespace
 
 //===----------------------------------------------------------------------===//
diff --git a/utils/TableGen/InfoByHwMode.cpp b/utils/TableGen/InfoByHwMode.cpp
new file mode 100644
index 000000000000..d5a181e130a5
--- /dev/null
+++ b/utils/TableGen/InfoByHwMode.cpp
@@ -0,0 +1,206 @@
+//===--- InfoByHwMode.cpp -------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Classes that implement data parameterized by HW modes for instruction
+// selection. Currently it is ValueTypeByHwMode (parameterized ValueType),
+// and RegSizeInfoByHwMode (parameterized register/spill size and alignment
+// data).
+//===----------------------------------------------------------------------===//
+
+#include "CodeGenTarget.h"
+#include "InfoByHwMode.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <set>
+#include <string>
+
+using namespace llvm;
+
+std::string llvm::getModeName(unsigned Mode) {
+  if (Mode == DefaultMode)
+    return "*";
+  return (Twine('m') + Twine(Mode)).str();
+}
+
+ValueTypeByHwMode::ValueTypeByHwMode(Record *R, const CodeGenHwModes &CGH) {
+  const HwModeSelect &MS = CGH.getHwModeSelect(R);
+  for (const HwModeSelect::PairType &P : MS.Items) {
+    auto I = Map.insert({P.first, MVT(llvm::getValueType(P.second))});
+    assert(I.second && "Duplicate entry?");
+    (void)I;
+  }
+}
+
+bool ValueTypeByHwMode::operator== (const ValueTypeByHwMode &T) const {
+  assert(isValid() && T.isValid() && "Invalid type in assignment");
+  bool Simple = isSimple();
+  if (Simple != T.isSimple())
+    return false;
+  if (Simple)
+    return getSimple() == T.getSimple();
+
+  return Map == T.Map;
+}
+
+bool ValueTypeByHwMode::operator< (const ValueTypeByHwMode &T) const {
+  assert(isValid() && T.isValid() && "Invalid type in comparison");
+  // Default order for maps.
+  return Map < T.Map;
+}
+
+MVT &ValueTypeByHwMode::getOrCreateTypeForMode(unsigned Mode, MVT Type) {
+  auto F = Map.find(Mode);
+  if (F != Map.end())
+    return F->second;
+  // If Mode is not in the map, look up the default mode. If it exists,
+  // make a copy of it for Mode and return it.
+  auto D = Map.find(DefaultMode);
+  if (D != Map.end())
+    return Map.insert(std::make_pair(Mode, D->second)).first->second;
+  // If default mode is not present either, use provided Type.
+  return Map.insert(std::make_pair(Mode, Type)).first->second;
+}
+
+StringRef ValueTypeByHwMode::getMVTName(MVT T) {
+  StringRef N = llvm::getEnumName(T.SimpleTy);
+  N.consume_front("MVT::");
+  return N;
+}
+
+void ValueTypeByHwMode::writeToStream(raw_ostream &OS) const {
+  if (isSimple()) {
+    OS << getMVTName(getSimple());
+    return;
+  }
+
+  std::vector<const PairType*> Pairs;
+  for (const auto &P : Map)
+    Pairs.push_back(&P);
+  std::sort(Pairs.begin(), Pairs.end(), deref<std::less<PairType>>());
+
+  OS << '{';
+  for (unsigned i = 0, e = Pairs.size(); i != e; ++i) {
+    const PairType *P = Pairs[i];
+    OS << '(' << getModeName(P->first)
+       << ':' << getMVTName(P->second).str() << ')';
+    if (i != e-1)
+      OS << ',';
+  }
+  OS << '}';
+}
+
+LLVM_DUMP_METHOD
+void ValueTypeByHwMode::dump() const {
+  dbgs() << *this << '\n';
+}
+
+ValueTypeByHwMode llvm::getValueTypeByHwMode(Record *Rec,
+                                             const CodeGenHwModes &CGH) {
+#ifndef NDEBUG
+  if (!Rec->isSubClassOf("ValueType"))
+    Rec->dump();
+#endif
+  assert(Rec->isSubClassOf("ValueType") &&
+         "Record must be derived from ValueType");
+  if (Rec->isSubClassOf("HwModeSelect"))
+    return ValueTypeByHwMode(Rec, CGH);
+  return ValueTypeByHwMode(llvm::getValueType(Rec));
+}
+
+RegSizeInfo::RegSizeInfo(Record *R, const CodeGenHwModes &CGH) {
+  RegSize = R->getValueAsInt("RegSize");
+  SpillSize = R->getValueAsInt("SpillSize");
+  SpillAlignment = R->getValueAsInt("SpillAlignment");
+}
+
+bool RegSizeInfo::operator< (const RegSizeInfo &I) const {
+  return std::tie(RegSize, SpillSize, SpillAlignment) <
+         std::tie(I.RegSize, I.SpillSize, I.SpillAlignment);
+}
+
+bool RegSizeInfo::isSubClassOf(const RegSizeInfo &I) const {
+  return RegSize <= I.RegSize &&
+         SpillAlignment && I.SpillAlignment % SpillAlignment == 0 &&
+         SpillSize <= I.SpillSize;
+}
+
+void RegSizeInfo::writeToStream(raw_ostream &OS) const {
+  OS << "[R=" << RegSize << ",S=" << SpillSize
+     << ",A=" << SpillAlignment << ']';
+}
+
+RegSizeInfoByHwMode::RegSizeInfoByHwMode(Record *R,
+      const CodeGenHwModes &CGH) {
+  const HwModeSelect &MS = CGH.getHwModeSelect(R);
+  for (const HwModeSelect::PairType &P : MS.Items) {
+    auto I = Map.insert({P.first, RegSizeInfo(P.second, CGH)});
+    assert(I.second && "Duplicate entry?");
+    (void)I;
+  }
+}
+
+bool RegSizeInfoByHwMode::operator< (const RegSizeInfoByHwMode &I) const {
+  unsigned M0 = Map.begin()->first;
+  return get(M0) < I.get(M0);
+}
+
+bool RegSizeInfoByHwMode::operator== (const RegSizeInfoByHwMode &I) const {
+  unsigned M0 = Map.begin()->first;
+  return get(M0) == I.get(M0);
+}
+
+bool RegSizeInfoByHwMode::isSubClassOf(const RegSizeInfoByHwMode &I) const {
+  unsigned M0 = Map.begin()->first;
+  return get(M0).isSubClassOf(I.get(M0));
+}
+
+bool RegSizeInfoByHwMode::hasStricterSpillThan(const RegSizeInfoByHwMode &I)
+      const {
+  unsigned M0 = Map.begin()->first;
+  const RegSizeInfo &A0 = get(M0);
+  const RegSizeInfo &B0 = I.get(M0);
+  return std::tie(A0.SpillSize, A0.SpillAlignment) >
+         std::tie(B0.SpillSize, B0.SpillAlignment);
+}
+
+void RegSizeInfoByHwMode::writeToStream(raw_ostream &OS) const {
+  typedef typename decltype(Map)::value_type PairType;
+  std::vector<const PairType*> Pairs;
+  for (const auto &P : Map)
+    Pairs.push_back(&P);
+  std::sort(Pairs.begin(), Pairs.end(), deref<std::less<PairType>>());
+
+  OS << '{';
+  for (unsigned i = 0, e = Pairs.size(); i != e; ++i) {
+    const PairType *P = Pairs[i];
+    OS << '(' << getModeName(P->first) << ':' << P->second << ')';
+    if (i != e-1)
+      OS << ',';
+  }
+  OS << '}';
+}
+
+namespace llvm {
+  raw_ostream &operator<<(raw_ostream &OS, const ValueTypeByHwMode &T) {
+    T.writeToStream(OS);
+    return OS;
+  }
+
+  raw_ostream &operator<<(raw_ostream &OS, const RegSizeInfo &T) {
+    T.writeToStream(OS);
+    return OS;
+  }
+
+  raw_ostream &operator<<(raw_ostream &OS, const RegSizeInfoByHwMode &T) {
+    T.writeToStream(OS);
+    return OS;
+  }
+}
diff --git a/utils/TableGen/InfoByHwMode.h b/utils/TableGen/InfoByHwMode.h
new file mode 100644
index 000000000000..b2e217498888
--- /dev/null
+++ b/utils/TableGen/InfoByHwMode.h
@@ -0,0 +1,182 @@
+//===--- InfoByHwMode.h -----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Classes that implement data parameterized by HW modes for instruction
+// selection. Currently it is ValueTypeByHwMode (parameterized ValueType),
+// and RegSizeInfoByHwMode (parameterized register/spill size and alignment
+// data).
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_TABLEGEN_INFOBYHWMODE_H
+#define LLVM_UTILS_TABLEGEN_INFOBYHWMODE_H
+
+#include "CodeGenHwModes.h"
+#include "llvm/CodeGen/MachineValueType.h"
+
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+struct CodeGenHwModes;
+class Record;
+class raw_ostream;
+
+template <typename InfoT> struct InfoByHwMode;
+
+std::string getModeName(unsigned Mode);
+
+enum : unsigned {
+  DefaultMode = CodeGenHwModes::DefaultMode,
+};
+
+template <typename InfoT>
+std::vector<unsigned> union_modes(const InfoByHwMode<InfoT> &A,
+                                  const InfoByHwMode<InfoT> &B) {
+  std::vector<unsigned> V;
+  std::set<unsigned> U;
+  for (const auto &P : A)
+    U.insert(P.first);
+  for (const auto &P : B)
+    U.insert(P.first);
+  // Make sure that the default mode is last on the list.
+  bool HasDefault = U.count(DefaultMode);
+  for (unsigned M : U)
+    if (M != DefaultMode)
+      V.push_back(M);
+  if (HasDefault)
+    V.push_back(DefaultMode);
+  return V;
+}
+
+template <typename InfoT>
+struct InfoByHwMode {
+  typedef std::map<unsigned,InfoT> MapType;
+  typedef typename MapType::value_type PairType;
+  typedef typename MapType::iterator iterator;
+  typedef typename MapType::const_iterator const_iterator;
+
+  InfoByHwMode() = default;
+  InfoByHwMode(const MapType &M) : Map(M) {}
+
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  iterator begin() { return Map.begin(); }
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  iterator end()   { return Map.end(); }
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  const_iterator begin() const { return Map.begin(); }
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  const_iterator end() const   { return Map.end(); }
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  bool empty() const { return Map.empty(); }
+
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  bool hasMode(unsigned M) const { return Map.find(M) != Map.end(); }
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  bool hasDefault() const { return hasMode(DefaultMode); }
+
+  InfoT &get(unsigned Mode) {
+    if (!hasMode(Mode)) {
+      assert(hasMode(DefaultMode));
+      Map.insert({Mode, Map.at(DefaultMode)});
+    }
+    return Map.at(Mode);
+  }
+  const InfoT &get(unsigned Mode) const {
+    auto F = Map.find(Mode);
+    if (Mode != DefaultMode && F == Map.end())
+      F = Map.find(DefaultMode);
+    assert(F != Map.end());
+    return F->second;
+  }
+
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  bool isSimple() const {
+    return Map.size() == 1 && Map.begin()->first == DefaultMode;
+  }
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  InfoT getSimple() const {
+    assert(isSimple());
+    return Map.begin()->second;
+  }
+  void makeSimple(unsigned Mode) {
+    assert(hasMode(Mode) || hasDefault());
+    InfoT I = get(Mode);
+    Map.clear();
+    Map.insert(std::make_pair(DefaultMode, I));
+  }
+
+  MapType Map;
+};
+
+struct ValueTypeByHwMode : public InfoByHwMode<MVT> {
+  ValueTypeByHwMode(Record *R, const CodeGenHwModes &CGH);
+  ValueTypeByHwMode(MVT T) { Map.insert({DefaultMode,T}); }
+  ValueTypeByHwMode() = default;
+
+  bool operator== (const ValueTypeByHwMode &T) const;
+  bool operator< (const ValueTypeByHwMode &T) const;
+
+  bool isValid() const {
+    return !Map.empty();
+  }
+  MVT getType(unsigned Mode) const { return get(Mode); }
+  MVT &getOrCreateTypeForMode(unsigned Mode, MVT Type);
+
+  static StringRef getMVTName(MVT T);
+  void writeToStream(raw_ostream &OS) const;
+  void dump() const;
+};
+
+ValueTypeByHwMode getValueTypeByHwMode(Record *Rec,
+                                       const CodeGenHwModes &CGH);
+
+struct RegSizeInfo {
+  unsigned RegSize;
+  unsigned SpillSize;
+  unsigned SpillAlignment;
+
+  RegSizeInfo(Record *R, const CodeGenHwModes &CGH);
+  RegSizeInfo() = default;
+  bool operator< (const RegSizeInfo &I) const;
+  bool operator== (const RegSizeInfo &I) const {
+    return std::tie(RegSize, SpillSize, SpillAlignment) ==
+           std::tie(I.RegSize, I.SpillSize, I.SpillAlignment);
+  }
+  bool operator!= (const RegSizeInfo &I) const {
+    return !(*this == I);
+  }
+
+  bool isSubClassOf(const RegSizeInfo &I) const;
+  void writeToStream(raw_ostream &OS) const;
+};
+
+struct RegSizeInfoByHwMode : public InfoByHwMode<RegSizeInfo> {
+  RegSizeInfoByHwMode(Record *R, const CodeGenHwModes &CGH);
+  RegSizeInfoByHwMode() = default;
+  bool operator< (const RegSizeInfoByHwMode &VI) const;
+  bool operator== (const RegSizeInfoByHwMode &VI) const;
+  bool operator!= (const RegSizeInfoByHwMode &VI) const {
+    return !(*this == VI);
+  }
+
+  bool isSubClassOf(const RegSizeInfoByHwMode &I) const;
+  bool hasStricterSpillThan(const RegSizeInfoByHwMode &I) const;
+
+  void writeToStream(raw_ostream &OS) const;
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const ValueTypeByHwMode &T);
+raw_ostream &operator<<(raw_ostream &OS, const RegSizeInfo &T);
+raw_ostream &operator<<(raw_ostream &OS, const RegSizeInfoByHwMode &T);
+
+} // namespace llvm
+
+#endif // LLVM_UTILS_TABLEGEN_INFOBYHWMODE_H
diff --git a/utils/TableGen/InstrDocsEmitter.cpp b/utils/TableGen/InstrDocsEmitter.cpp
new file mode 100644
index 000000000000..fa9ee9569427
--- /dev/null
+++ b/utils/TableGen/InstrDocsEmitter.cpp
@@ -0,0 +1,232 @@
+//===- InstrDocsEmitter.cpp - Opcode Documentation Generator --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// InstrDocsEmitter generates restructured text documentation for the opcodes
+// that can be used by MachineInstr. For each opcode, the documentation lists:
+// * Opcode name
+// * Assembly string
+// * Flags (e.g. mayLoad, isBranch, ...)
+// * Operands, including type and name
+// * Operand constraints
+// * Implicit register uses & defs
+// * Predicates
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeGenDAGPatterns.h"
+#include "CodeGenInstruction.h"
+#include "CodeGenTarget.h"
+#include "TableGenBackends.h"
+#include "llvm/TableGen/Record.h"
+#include <string>
+#include <vector>
+
+using namespace llvm;
+
+namespace llvm {
+
+void writeTitle(StringRef Str, raw_ostream &OS, char Kind = '-') {
+  OS << std::string(Str.size(), Kind) << "\n" << Str << "\n"
+     << std::string(Str.size(), Kind) << "\n";
+}
+
+void writeHeader(StringRef Str, raw_ostream &OS, char Kind = '-') {
+  OS << Str << "\n" << std::string(Str.size(), Kind) << "\n";
+}
+
+std::string escapeForRST(StringRef Str) {
+  std::string Result;
+  Result.reserve(Str.size() + 4);
+  for (char C : Str) {
+    switch (C) {
+    // We want special characters to be shown as their C escape codes.
+    case '\n': Result += "\\n"; break;
+    case '\t': Result += "\\t"; break;
+    // Underscore at the end of a line has a special meaning in rst.
+    case '_': Result += "\\_"; break;
+    default: Result += C;
+    }
+  }
+  return Result;
+}
+
+void EmitInstrDocs(RecordKeeper &RK, raw_ostream &OS) {
+  CodeGenDAGPatterns CDP(RK);
+  CodeGenTarget &Target = CDP.getTargetInfo();
+  unsigned VariantCount = Target.getAsmParserVariantCount();
+
+  // Page title.
+  std::string Title = Target.getName();
+  Title += " Instructions";
+  writeTitle(Title, OS);
+  OS << "\n";
+
+  for (const CodeGenInstruction *II : Target.getInstructionsByEnumValue()) {
+    Record *Inst = II->TheDef;
+
+    // Don't print the target-independent instructions.
+    if (II->Namespace == "TargetOpcode")
+      continue;
+
+    // Heading (instruction name).
+    writeHeader(escapeForRST(Inst->getName()), OS, '=');
+    OS << "\n";
+
+    // Assembly string(s).
+    if (!II->AsmString.empty()) {
+      for (unsigned VarNum = 0; VarNum < VariantCount; ++VarNum) {
+        Record *AsmVariant = Target.getAsmParserVariant(VarNum);
+        OS << "Assembly string";
+        if (VariantCount != 1)
+          OS << " (" << AsmVariant->getValueAsString("Name") << ")";
+        std::string AsmString =
+            CodeGenInstruction::FlattenAsmStringVariants(II->AsmString, VarNum);
+        // We trim spaces at each end of the asm string because rst needs the
+        // formatting backticks to be next to a non-whitespace character.
+        OS << ": ``" << escapeForRST(StringRef(AsmString).trim(" "))
+           << "``\n\n";
+      }
+    }
+
+    // Boolean flags.
+    std::vector<const char *> FlagStrings;
+#define xstr(s) str(s)
+#define str(s) #s
+#define FLAG(f) if (II->f) { FlagStrings.push_back(str(f)); }
+    FLAG(isReturn)
+    FLAG(isBranch)
+    FLAG(isIndirectBranch)
+    FLAG(isCompare)
+    FLAG(isMoveImm)
+    FLAG(isBitcast)
+    FLAG(isSelect)
+    FLAG(isBarrier)
+    FLAG(isCall)
+    FLAG(isAdd)
+    FLAG(canFoldAsLoad)
+    FLAG(mayLoad)
+    //FLAG(mayLoad_Unset) // Deliberately omitted.
+    FLAG(mayStore)
+    //FLAG(mayStore_Unset) // Deliberately omitted.
+    FLAG(isPredicable)
+    FLAG(isConvertibleToThreeAddress)
+    FLAG(isCommutable)
+    FLAG(isTerminator)
+    FLAG(isReMaterializable)
+    FLAG(hasDelaySlot)
+    FLAG(usesCustomInserter)
+    FLAG(hasPostISelHook)
+    FLAG(hasCtrlDep)
+    FLAG(isNotDuplicable)
+    FLAG(hasSideEffects)
+    //FLAG(hasSideEffects_Unset) // Deliberately omitted.
+    FLAG(isAsCheapAsAMove)
+    FLAG(hasExtraSrcRegAllocReq)
+    FLAG(hasExtraDefRegAllocReq)
+    FLAG(isCodeGenOnly)
+    FLAG(isPseudo)
+    FLAG(isRegSequence)
+    FLAG(isExtractSubreg)
+    FLAG(isInsertSubreg)
+    FLAG(isConvergent)
+    FLAG(hasNoSchedulingInfo)
+    if (!FlagStrings.empty()) {
+      OS << "Flags: ";
+      bool IsFirst = true;
+      for (auto FlagString : FlagStrings) {
+        if (!IsFirst)
+          OS << ", ";
+        OS << "``" << FlagString << "``";
+        IsFirst = false;
+      }
+      OS << "\n\n";
+    }
+
+    // Operands.
+    for (unsigned i = 0; i < II->Operands.size(); ++i) {
+      bool IsDef = i < II->Operands.NumDefs;
+      auto Op = II->Operands[i];
+
+      if (Op.MINumOperands > 1) {
+        // This operand corresponds to multiple operands on the
+        // MachineInstruction, so print all of them, showing the types and
+        // names of both the compound operand and the basic operands it
+        // contains.
+        for (unsigned SubOpIdx = 0; SubOpIdx < Op.MINumOperands; ++SubOpIdx) {
+          Record *SubRec =
+              cast<DefInit>(Op.MIOperandInfo->getArg(SubOpIdx))->getDef();
+          StringRef SubOpName = Op.MIOperandInfo->getArgNameStr(SubOpIdx);
+          StringRef SubOpTypeName = SubRec->getName();
+
+          OS << "* " << (IsDef ? "DEF" : "USE") << " ``" << Op.Rec->getName()
+             << "/" << SubOpTypeName << ":$" << Op.Name << ".";
+          // Not all sub-operands are named, make up a name for these.
+          if (SubOpName.empty())
+            OS << "anon" << SubOpIdx;
+          else
+            OS << SubOpName;
+          OS << "``\n\n";
+        }
+      } else {
+        // The operand corresponds to only one MachineInstruction operand.
+        OS << "* " << (IsDef ? "DEF" : "USE") << " ``" << Op.Rec->getName()
+           << ":$" << Op.Name << "``\n\n";
+      }
+    }
+
+    // Constraints.
+    StringRef Constraints = Inst->getValueAsString("Constraints");
+    if (!Constraints.empty()) {
+      OS << "Constraints: ``" << Constraints << "``\n\n";
+    }
+
+    // Implicit definitions.
+    if (!II->ImplicitDefs.empty()) {
+      OS << "Implicit defs: ";
+      bool IsFirst = true;
+      for (Record *Def : II->ImplicitDefs) {
+        if (!IsFirst)
+          OS << ", ";
+        OS << "``" << Def->getName() << "``";
+        IsFirst = false;
+      }
+      OS << "\n\n";
+    }
+
+    // Implicit uses.
+    if (!II->ImplicitUses.empty()) {
+      OS << "Implicit uses: ";
+      bool IsFirst = true;
+      for (Record *Use : II->ImplicitUses) {
+        if (!IsFirst)
+          OS << ", ";
+        OS << "``" << Use->getName() << "``";
+        IsFirst = false;
+      }
+      OS << "\n\n";
+    }
+
+    // Predicates.
+    std::vector<Record *> Predicates =
+        II->TheDef->getValueAsListOfDefs("Predicates");
+    if (!Predicates.empty()) {
+      OS << "Predicates: ";
+      bool IsFirst = true;
+      for (Record *P : Predicates) {
+        if (!IsFirst)
+          OS << ", ";
+        OS << "``" << P->getName() << "``";
+        IsFirst = false;
+      }
+      OS << "\n\n";
+    }
+  }
+}
+
+} // end llvm namespace
diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp
index e270a17356f7..379e3245d066 100644
--- a/utils/TableGen/InstrInfoEmitter.cpp
+++ b/utils/TableGen/InstrInfoEmitter.cpp
@@ -588,6 +588,14 @@ void InstrInfoEmitter::emitEnums(raw_ostream &OS) {
     OS << "    " << Inst->TheDef->getName() << "\t= " << Num++ << ",\n";
   OS << "    INSTRUCTION_LIST_END = " << Num << "\n";
   OS << "  };\n\n";
+  OS << "} // end " << Namespace << " namespace\n";
+  OS << "} // end llvm namespace\n";
+  OS << "#endif // GET_INSTRINFO_ENUM\n\n";
+
+  OS << "#ifdef GET_INSTRINFO_SCHED_ENUM\n";
+  OS << "#undef GET_INSTRINFO_SCHED_ENUM\n";
+  OS << "namespace llvm {\n\n";
+  OS << "namespace " << Namespace << " {\n";
   OS << "namespace Sched {\n";
   OS << "  enum {\n";
   Num = 0;
@@ -599,7 +607,7 @@ void InstrInfoEmitter::emitEnums(raw_ostream &OS) {
   OS << "} // end " << Namespace << " namespace\n";
   OS << "} // end llvm namespace\n";
 
-  OS << "#endif // GET_INSTRINFO_ENUM\n\n";
+  OS << "#endif // GET_INSTRINFO_SCHED_ENUM\n\n";
 }
 
 namespace llvm {
diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp
index caa52d28f771..b4e61ec53c19 100644
--- a/utils/TableGen/IntrinsicEmitter.cpp
+++ b/utils/TableGen/IntrinsicEmitter.cpp
@@ -214,7 +214,10 @@ enum IIT_Info {
   IIT_VEC_OF_ANYPTRS_TO_ELT = 34,
   IIT_I128 = 35,
   IIT_V512 = 36,
-  IIT_V1024 = 37
+  IIT_V1024 = 37,
+  IIT_STRUCT6 = 38,
+  IIT_STRUCT7 = 39,
+  IIT_STRUCT8 = 40
 };
 
 static void EncodeFixedValueType(MVT::SimpleValueType VT,
@@ -369,6 +372,9 @@ static void ComputeFixedEncoding(const CodeGenIntrinsic &Int,
       case 3: TypeSig.push_back(IIT_STRUCT3); break;
       case 4: TypeSig.push_back(IIT_STRUCT4); break;
       case 5: TypeSig.push_back(IIT_STRUCT5); break;
+      case 6: TypeSig.push_back(IIT_STRUCT6); break;
+      case 7: TypeSig.push_back(IIT_STRUCT7); break;
+      case 8: TypeSig.push_back(IIT_STRUCT8); break;
       default: llvm_unreachable("Unhandled case in struct");
     }
 
@@ -695,7 +701,7 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
         if (addComma)
           OS << ",";
         OS << "Attribute::WriteOnly,";
-        OS << "Attribute::InaccessibleMemOrArgOnly";
+        OS << "Attribute::InaccessibleMemOrArgMemOnly";
         break;
       case CodeGenIntrinsic::ReadWriteArgMem:
         if (addComma)
diff --git a/utils/TableGen/OptParserEmitter.cpp b/utils/TableGen/OptParserEmitter.cpp
index e3777d036a23..0358cf26509b 100644
--- a/utils/TableGen/OptParserEmitter.cpp
+++ b/utils/TableGen/OptParserEmitter.cpp
@@ -298,5 +298,31 @@ void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) {
     OS << ")\n";
   }
   OS << "#endif // OPTION\n";
+
+  OS << "\n";
+  OS << "#ifdef OPTTABLE_ARG_INIT\n";
+  OS << "//////////\n";
+  OS << "// Option Values\n\n";
+  for (unsigned I = 0, E = Opts.size(); I != E; ++I) {
+    const Record &R = *Opts[I];
+    if (isa<UnsetInit>(R.getValueInit("ValuesCode")))
+      continue;
+    OS << "{\n";
+    OS << "bool ValuesWereAdded;\n";
+    OS << R.getValueAsString("ValuesCode");
+    OS << "\n";
+    for (const std::string &Pref : R.getValueAsListOfStrings("Prefixes")) {
+      OS << "ValuesWereAdded = Opt.addValues(";
+      std::string S = (Pref + R.getValueAsString("Name")).str();
+      write_cstring(OS, S);
+      OS << ", Values);\n";
+      OS << "(void)ValuesWereAdded;\n";
+      OS << "assert(ValuesWereAdded && \"Couldn't add values to "
+            "OptTable!\");\n";
+    }
+    OS << "}\n";
+  }
+  OS << "\n";
+  OS << "#endif // OPTTABLE_ARG_INIT\n";
 }
 } // end namespace llvm
diff --git a/utils/TableGen/RegisterBankEmitter.cpp b/utils/TableGen/RegisterBankEmitter.cpp
index 880d075da427..5c6471688044 100644
--- a/utils/TableGen/RegisterBankEmitter.cpp
+++ b/utils/TableGen/RegisterBankEmitter.cpp
@@ -18,6 +18,7 @@
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
 
+#include "CodeGenHwModes.h"
 #include "CodeGenRegisters.h"
 
 #define DEBUG_TYPE "register-bank-emitter"
@@ -84,7 +85,8 @@ public:
     //        the VT's reliably due to Untyped.
     if (RCWithLargestRegsSize == nullptr)
       RCWithLargestRegsSize = RC;
-    else if (RCWithLargestRegsSize->SpillSize < RC->SpillSize)
+    else if (RCWithLargestRegsSize->RSI.get(DefaultMode).SpillSize <
+             RC->RSI.get(DefaultMode).SpillSize)
       RCWithLargestRegsSize = RC;
     assert(RCWithLargestRegsSize && "RC was nullptr?");
 
@@ -115,7 +117,7 @@ private:
 
 public:
   RegisterBankEmitter(RecordKeeper &R)
-      : Records(R), RegisterClassHierarchy(Records) {}
+      : Records(R), RegisterClassHierarchy(Records, CodeGenHwModes(R)) {}
 
   void run(raw_ostream &OS);
 };
@@ -241,7 +243,8 @@ void RegisterBankEmitter::emitBaseClassImplementation(
   for (const auto &Bank : Banks) {
     std::string QualifiedBankID =
         (TargetName + "::" + Bank.getEnumeratorName()).str();
-    unsigned Size = Bank.getRCWithLargestRegsSize()->SpillSize;
+    const CodeGenRegisterClass &RC = *Bank.getRCWithLargestRegsSize();
+    unsigned Size = RC.RSI.get(DefaultMode).SpillSize;
     OS << "RegisterBank " << Bank.getInstanceVarName() << "(/* ID */ "
        << QualifiedBankID << ", /* Name */ \"" << Bank.getName()
        << "\", /* Size */ " << Size << ", "
@@ -296,6 +299,19 @@ void RegisterBankEmitter::run(raw_ostream &OS) {
     Banks.push_back(Bank);
   }
 
+  // Warn about ambiguous MIR caused by register bank/class name clashes.
+  for (const auto &Class : Records.getAllDerivedDefinitions("RegisterClass")) {
+    for (const auto &Bank : Banks) {
+      if (Bank.getName().lower() == Class->getName().lower()) {
+        PrintWarning(Bank.getDef().getLoc(), "Register bank names should be "
+                                             "distinct from register classes "
+                                             "to avoid ambiguous MIR");
+        PrintNote(Bank.getDef().getLoc(), "RegisterBank was declared here");
+        PrintNote(Class->getLoc(), "RegisterClass was declared here");
+      }
+    }
+  }
+
   emitSourceFileHeader("Register Bank Source Fragments", OS);
   OS << "#ifdef GET_REGBANK_DECLARATIONS\n"
      << "#undef GET_REGBANK_DECLARATIONS\n";
diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp
index bebb1a183fc7..7eef2337c140 100644
--- a/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/utils/TableGen/RegisterInfoEmitter.cpp
@@ -26,6 +26,7 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Error.h"
@@ -44,13 +45,24 @@
 
 using namespace llvm;
 
+cl::OptionCategory RegisterInfoCat("Options for -gen-register-info");
+
+static cl::opt<bool>
+    RegisterInfoDebug("register-info-debug", cl::init(false),
+                      cl::desc("Dump register information to help debugging"),
+                      cl::cat(RegisterInfoCat));
+
 namespace {
 
 class RegisterInfoEmitter {
+  CodeGenTarget Target;
   RecordKeeper &Records;
 
 public:
-  RegisterInfoEmitter(RecordKeeper &R) : Records(R) {}
+  RegisterInfoEmitter(RecordKeeper &R) : Target(R), Records(R) {
+    CodeGenRegBank &RegBank = Target.getRegBank();
+    RegBank.computeDerivedInfo();
+  }
 
   // runEnums - Print out enum values for all of the registers.
   void runEnums(raw_ostream &o, CodeGenTarget &Target, CodeGenRegBank &Bank);
@@ -69,6 +81,8 @@ public:
   // run - Output the register file description.
   void run(raw_ostream &o);
 
+  void debugDump(raw_ostream &OS);
+
 private:
   void EmitRegMapping(raw_ostream &o, const std::deque<CodeGenRegister> &Regs,
                       bool isCtor);
@@ -854,8 +868,8 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
 
     // Compute the corresponding sub-register indexes.
     SubRegIdxVec &SRIs = SubRegIdxLists[i];
-    for (unsigned j = 0, je = SR.size(); j != je; ++j)
-      SRIs.push_back(Reg.getSubRegIndex(SR[j]));
+    for (const CodeGenRegister *S : SR)
+      SRIs.push_back(Reg.getSubRegIndex(S));
     SubRegIdxSeqs.add(SRIs);
 
     // Super-registers are already computed.
@@ -993,8 +1007,7 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
     OS << "  // " << Name << " Register Class...\n"
        << "  const MCPhysReg " << Name
        << "[] = {\n    ";
-    for (unsigned i = 0, e = Order.size(); i != e; ++i) {
-      Record *Reg = Order[i];
+    for (Record *Reg : Order) {
       OS << getQualifiedName(Reg) << ", ";
     }
     OS << "\n  };\n\n";
@@ -1003,8 +1016,7 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
        << "  const uint8_t " << Name
        << "Bits[] = {\n    ";
     BitVectorEmitter BVE;
-    for (unsigned i = 0, e = Order.size(); i != e; ++i) {
-      Record *Reg = Order[i];
+    for (Record *Reg : Order) {
       BVE.add(Target.getRegBank().getReg(Reg)->EnumValue);
     }
     BVE.print(OS);
@@ -1023,13 +1035,14 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
 
   for (const auto &RC : RegisterClasses) {
     assert(isInt<8>(RC.CopyCost) && "Copy cost too large.");
-    // Register size and spill size will become independent, but are not at
-    // the moment. For now use SpillSize as the register size.
+    uint32_t RegSize = 0;
+    if (RC.RSI.isSimple())
+      RegSize = RC.RSI.getSimple().RegSize;
     OS << "  { " << RC.getName() << ", " << RC.getName() << "Bits, "
        << RegClassStrings.get(RC.getName()) << ", "
        << RC.getOrder().size() << ", sizeof(" << RC.getName() << "Bits), "
        << RC.getQualifiedName() + "RegClassID" << ", "
-       << RC.SpillSize/8 << ", "
+       << RegSize/8 << ", "
        << RC.CopyCost << ", "
        << ( RC.Allocatable ? "true" : "false" ) << " },\n";
   }
@@ -1089,7 +1102,7 @@ RegisterInfoEmitter::runTargetHeader(raw_ostream &OS, CodeGenTarget &Target,
   const std::string &TargetName = Target.getName();
   std::string ClassName = TargetName + "GenRegisterInfo";
 
-  OS << "#include \"llvm/Target/TargetRegisterInfo.h\"\n\n";
+  OS << "#include \"llvm/CodeGen/TargetRegisterInfo.h\"\n\n";
 
   OS << "namespace llvm {\n\n";
 
@@ -1097,7 +1110,8 @@ RegisterInfoEmitter::runTargetHeader(raw_ostream &OS, CodeGenTarget &Target,
 
   OS << "struct " << ClassName << " : public TargetRegisterInfo {\n"
      << "  explicit " << ClassName
-     << "(unsigned RA, unsigned D = 0, unsigned E = 0, unsigned PC = 0);\n";
+     << "(unsigned RA, unsigned D = 0, unsigned E = 0,\n"
+     << "      unsigned PC = 0, unsigned HwMode = 0);\n";
   if (!RegBank.getSubRegIndices().empty()) {
     OS << "  unsigned composeSubRegIndicesImpl"
        << "(unsigned, unsigned) const override;\n"
@@ -1176,10 +1190,19 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
       AllocatableRegs.insert(Order.begin(), Order.end());
   }
 
+  const CodeGenHwModes &CGH = Target.getHwModes();
+  unsigned NumModes = CGH.getNumModeIds();
+
   // Build a shared array of value types.
-  SequenceToOffsetTable<SmallVector<MVT::SimpleValueType, 4> > VTSeqs;
-  for (const auto &RC : RegisterClasses)
-    VTSeqs.add(RC.VTs);
+  SequenceToOffsetTable<std::vector<MVT::SimpleValueType>> VTSeqs;
+  for (unsigned M = 0; M < NumModes; ++M) {
+    for (const auto &RC : RegisterClasses) {
+      std::vector<MVT::SimpleValueType> S;
+      for (const ValueTypeByHwMode &VVT : RC.VTs)
+        S.push_back(VVT.get(M).SimpleTy);
+      VTSeqs.add(S);
+    }
+  }
   VTSeqs.layout();
   OS << "\nstatic const MVT::SimpleValueType VTLists[] = {\n";
   VTSeqs.emit(OS, printSimpleValueType, "MVT::Other");
@@ -1207,6 +1230,32 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
 
   // Now that all of the structs have been emitted, emit the instances.
   if (!RegisterClasses.empty()) {
+    OS << "\nstatic const TargetRegisterInfo::RegClassInfo RegClassInfos[]"
+       << " = {\n";
+    for (unsigned M = 0; M < NumModes; ++M) {
+      unsigned EV = 0;
+      OS << "  // Mode = " << M << " (";
+      if (M == 0)
+        OS << "Default";
+      else
+        OS << CGH.getMode(M).Name;
+      OS << ")\n";
+      for (const auto &RC : RegisterClasses) {
+        assert(RC.EnumValue == EV++ && "Unexpected order of register classes");
+        (void)EV;
+        const RegSizeInfo &RI = RC.RSI.get(M);
+        OS << "  { " << RI.RegSize << ", " << RI.SpillSize << ", "
+           << RI.SpillAlignment;
+        std::vector<MVT::SimpleValueType> VTs;
+        for (const ValueTypeByHwMode &VVT : RC.VTs)
+          VTs.push_back(VVT.get(M).SimpleTy);
+        OS << ", VTLists+" << VTSeqs.get(VTs) << " },    // "
+           << RC.getName() << '\n';
+      }
+    }
+    OS << "};\n";
+
+
     OS << "\nstatic const TargetRegisterClass *const "
        << "NullRegClasses[] = { nullptr };\n\n";
 
@@ -1313,15 +1362,10 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
        << " {   // Register class instances\n";
 
     for (const auto &RC : RegisterClasses) {
-      assert(isUInt<16>(RC.SpillSize/8) && "SpillSize too large.");
-      assert(isUInt<16>(RC.SpillAlignment/8) && "SpillAlignment too large.");
       OS << "  extern const TargetRegisterClass " << RC.getName()
          << "RegClass = {\n    " << '&' << Target.getName()
          << "MCRegisterClasses[" << RC.getName() << "RegClassID],\n    "
-         << RC.SpillSize/8 << ", /* SpillSize */\n    "
-         << RC.SpillAlignment/8 << ", /* SpillAlignment */\n    "
-         << "VTLists + " << VTSeqs.get(RC.VTs) << ",\n    " << RC.getName()
-         << "SubClassMask,\n    SuperRegIdxSeqs + "
+         << RC.getName() << "SubClassMask,\n    SuperRegIdxSeqs + "
          << SuperRegIdxSeqs.get(SuperRegIdxLists[RC.EnumValue]) << ",\n    ";
       printMask(OS, RC.LaneMask);
       OS << ",\n    " << (unsigned)RC.AllocationPriority << ",\n    "
@@ -1425,12 +1469,14 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
   EmitRegMappingTables(OS, Regs, true);
 
   OS << ClassName << "::\n" << ClassName
-     << "(unsigned RA, unsigned DwarfFlavour, unsigned EHFlavour, unsigned PC)\n"
+     << "(unsigned RA, unsigned DwarfFlavour, unsigned EHFlavour,\n"
+        "      unsigned PC, unsigned HwMode)\n"
      << "  : TargetRegisterInfo(" << TargetName << "RegInfoDesc"
-     << ", RegisterClasses, RegisterClasses+" << RegisterClasses.size() <<",\n"
-     << "             SubRegIndexNameTable, SubRegIndexLaneMaskTable, ";
+     << ", RegisterClasses, RegisterClasses+" << RegisterClasses.size() << ",\n"
+     << "             SubRegIndexNameTable, SubRegIndexLaneMaskTable,\n"
+     << "             ";
   printMask(OS, RegBank.CoveringLanes);
-  OS << ") {\n"
+  OS << ", RegClassInfos, HwMode) {\n"
      << "  InitMCRegisterInfo(" << TargetName << "RegDesc, " << Regs.size() + 1
      << ", RA, PC,\n                     " << TargetName
      << "MCRegisterClasses, " << RegisterClasses.size() << ",\n"
@@ -1521,14 +1567,74 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
 }
 
 void RegisterInfoEmitter::run(raw_ostream &OS) {
-  CodeGenTarget Target(Records);
   CodeGenRegBank &RegBank = Target.getRegBank();
-  RegBank.computeDerivedInfo();
-
   runEnums(OS, Target, RegBank);
   runMCDesc(OS, Target, RegBank);
   runTargetHeader(OS, Target, RegBank);
   runTargetDesc(OS, Target, RegBank);
+
+  if (RegisterInfoDebug)
+    debugDump(errs());
+}
+
+void RegisterInfoEmitter::debugDump(raw_ostream &OS) {
+  CodeGenRegBank &RegBank = Target.getRegBank();
+  const CodeGenHwModes &CGH = Target.getHwModes();
+  unsigned NumModes = CGH.getNumModeIds();
+  auto getModeName = [CGH] (unsigned M) -> StringRef {
+    if (M == 0)
+      return "Default";
+    return CGH.getMode(M).Name;
+  };
+
+  for (const CodeGenRegisterClass &RC : RegBank.getRegClasses()) {
+    OS << "RegisterClass " << RC.getName() << ":\n";
+    OS << "\tSpillSize: {";
+    for (unsigned M = 0; M != NumModes; ++M)
+      OS << ' ' << getModeName(M) << ':' << RC.RSI.get(M).SpillSize;
+    OS << " }\n\tSpillAlignment: {";
+    for (unsigned M = 0; M != NumModes; ++M)
+      OS << ' ' << getModeName(M) << ':' << RC.RSI.get(M).SpillAlignment;
+    OS << " }\n\tNumRegs: " << RC.getMembers().size() << '\n';
+    OS << "\tLaneMask: " << PrintLaneMask(RC.LaneMask) << '\n';
+    OS << "\tHasDisjunctSubRegs: " << RC.HasDisjunctSubRegs << '\n';
+    OS << "\tCoveredBySubRegs: " << RC.CoveredBySubRegs << '\n';
+    OS << "\tRegs:";
+    for (const CodeGenRegister *R : RC.getMembers()) {
+      OS << " " << R->getName();
+    }
+    OS << '\n';
+    OS << "\tSubClasses:";
+    const BitVector &SubClasses = RC.getSubClasses();
+    for (const CodeGenRegisterClass &SRC : RegBank.getRegClasses()) {
+      if (!SubClasses.test(SRC.EnumValue))
+        continue;
+      OS << " " << SRC.getName();
+    }
+    OS << '\n';
+    OS << "\tSuperClasses:";
+    for (const CodeGenRegisterClass *SRC : RC.getSuperClasses()) {
+      OS << " " << SRC->getName();
+    }
+    OS << '\n';
+  }
+
+  for (const CodeGenSubRegIndex &SRI : RegBank.getSubRegIndices()) {
+    OS << "SubRegIndex " << SRI.getName() << ":\n";
+    OS << "\tLaneMask: " << PrintLaneMask(SRI.LaneMask) << '\n';
+    OS << "\tAllSuperRegsCovered: " << SRI.AllSuperRegsCovered << '\n';
+  }
+
+  for (const CodeGenRegister &R : RegBank.getRegisters()) {
+    OS << "Register " << R.getName() << ":\n";
+    OS << "\tCostPerUse: " << R.CostPerUse << '\n';
+    OS << "\tCoveredBySubregs: " << R.CoveredBySubRegs << '\n';
+    OS << "\tHasDisjunctSubRegs: " << R.HasDisjunctSubRegs << '\n';
+    for (std::pair<CodeGenSubRegIndex*,CodeGenRegister*> P : R.getSubRegs()) {
+      OS << "\tSubReg " << P.first->getName()
+         << " = " << P.second->getName() << '\n';
+    }
+  }
 }
 
 namespace llvm {
diff --git a/utils/TableGen/SearchableTableEmitter.cpp b/utils/TableGen/SearchableTableEmitter.cpp
index f73c197dee5a..63252e8c0391 100644
--- a/utils/TableGen/SearchableTableEmitter.cpp
+++ b/utils/TableGen/SearchableTableEmitter.cpp
@@ -20,7 +20,6 @@
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include <algorithm>
-#include <sstream>
 #include <string>
 #include <vector>
 using namespace llvm;
diff --git a/utils/TableGen/SequenceToOffsetTable.h b/utils/TableGen/SequenceToOffsetTable.h
index e026b1c9fbf0..2b8f66a3bf3e 100644
--- a/utils/TableGen/SequenceToOffsetTable.h
+++ b/utils/TableGen/SequenceToOffsetTable.h
@@ -37,7 +37,7 @@ class SequenceToOffsetTable {
 
   // Define a comparator for SeqT that sorts a suffix immediately before a
   // sequence with that suffix.
-  struct SeqLess : public std::binary_function<SeqT, SeqT, bool> {
+  struct SeqLess {
     Less L;
     bool operator()(const SeqT &A, const SeqT &B) const {
       return std::lexicographical_compare(A.rbegin(), A.rend(),
diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp
index d1d873b66aaa..2c5658f8ce75 100644
--- a/utils/TableGen/SubtargetEmitter.cpp
+++ b/utils/TableGen/SubtargetEmitter.cpp
@@ -68,6 +68,7 @@ class SubtargetEmitter {
     }
   };
 
+  const CodeGenTarget &TGT;
   RecordKeeper &Records;
   CodeGenSchedModels &SchedModels;
   std::string Target;
@@ -106,12 +107,14 @@ class SubtargetEmitter {
   void EmitProcessorLookup(raw_ostream &OS);
   void EmitSchedModelHelpers(const std::string &ClassName, raw_ostream &OS);
   void EmitSchedModel(raw_ostream &OS);
+  void EmitHwModeCheck(const std::string &ClassName, raw_ostream &OS);
   void ParseFeaturesFunction(raw_ostream &OS, unsigned NumFeatures,
                              unsigned NumProcs);
 
 public:
-  SubtargetEmitter(RecordKeeper &R, CodeGenTarget &TGT):
-    Records(R), SchedModels(TGT.getSchedModels()), Target(TGT.getName()) {}
+  SubtargetEmitter(RecordKeeper &R, CodeGenTarget &TGT)
+    : TGT(TGT), Records(R), SchedModels(TGT.getSchedModels()),
+      Target(TGT.getName()) {}
 
   void run(raw_ostream &o);
 };
@@ -139,15 +142,12 @@ void SubtargetEmitter::Enumeration(raw_ostream &OS) {
   OS << "enum {\n";
 
   // For each record
-  for (unsigned i = 0; i < N;) {
+  for (unsigned i = 0; i < N; ++i) {
     // Next record
     Record *Def = DefList[i];
 
     // Get and emit name
-    OS << "  " << Def->getName() << " = " << i;
-    if (++i < N) OS << ",";
-
-    OS << "\n";
+    OS << "  " << Def->getName() << " = " << i << ",\n";
   }
 
   // Close enumeration and namespace
@@ -200,15 +200,8 @@ unsigned SubtargetEmitter::FeatureKeyValues(raw_ostream &OS) {
       OS << " " << Target << "::" << ImpliesList[j]->getName();
       if (++j < M) OS << ",";
     }
-    OS << " }";
-
-    OS << " }";
+    OS << " } },\n";
     ++NumFeatures;
-
-    // Depending on 'if more in the list' emit comma
-    if ((i + 1) < N) OS << ",";
-
-    OS << "\n";
   }
 
   // End feature table
@@ -233,10 +226,7 @@ unsigned SubtargetEmitter::CPUKeyValues(raw_ostream &OS) {
      << "SubTypeKV[] = {\n";
 
   // For each processor
-  for (unsigned i = 0, N = ProcessorList.size(); i < N;) {
-    // Next processor
-    Record *Processor = ProcessorList[i];
-
+  for (Record *Processor : ProcessorList) {
     StringRef Name = Processor->getValueAsString("Name");
     const std::vector<Record*> &FeatureList =
       Processor->getValueAsListOfDefs("Features");
@@ -251,15 +241,8 @@ unsigned SubtargetEmitter::CPUKeyValues(raw_ostream &OS) {
       OS << " " << Target << "::" << FeatureList[j]->getName();
       if (++j < M) OS << ",";
     }
-    OS << " }";
-
     // The { } is for the "implies" section of this data structure.
-    OS << ", { } }";
-
-    // Depending on 'if more in the list' emit comma
-    if (++i < N) OS << ",";
-
-    OS << "\n";
+    OS << " }, { } },\n";
   }
 
   // End processor table
@@ -597,12 +580,10 @@ void SubtargetEmitter::EmitProcessorProp(raw_ostream &OS, const Record *R,
 
 void SubtargetEmitter::EmitProcessorResources(const CodeGenProcModel &ProcModel,
                                               raw_ostream &OS) {
-  char Sep = ProcModel.ProcResourceDefs.empty() ? ' ' : ',';
-
   OS << "\n// {Name, NumUnits, SuperIdx, IsBuffered}\n";
   OS << "static const llvm::MCProcResourceDesc "
      << ProcModel.ModelName << "ProcResources" << "[] = {\n"
-     << "  {DBGFIELD(\"InvalidUnit\")     0, 0, 0}" << Sep << "\n";
+     << "  {DBGFIELD(\"InvalidUnit\")     0, 0, 0},\n";
 
   for (unsigned i = 0, e = ProcModel.ProcResourceDefs.size(); i < e; ++i) {
     Record *PRDef = ProcModel.ProcResourceDefs[i];
@@ -620,20 +601,19 @@ void SubtargetEmitter::EmitProcessorResources(const CodeGenProcModel &ProcModel,
     else {
       // Find the SuperIdx
       if (PRDef->getValueInit("Super")->isComplete()) {
-        SuperDef = SchedModels.findProcResUnits(
-          PRDef->getValueAsDef("Super"), ProcModel);
+        SuperDef =
+            SchedModels.findProcResUnits(PRDef->getValueAsDef("Super"),
+                                         ProcModel, PRDef->getLoc());
         SuperIdx = ProcModel.getProcResourceIdx(SuperDef);
       }
       NumUnits = PRDef->getValueAsInt("NumUnits");
     }
     // Emit the ProcResourceDesc
-    if (i+1 == e)
-      Sep = ' ';
     OS << "  {DBGFIELD(\"" << PRDef->getName() << "\") ";
     if (PRDef->getName().size() < 15)
       OS.indent(15 - PRDef->getName().size());
     OS << NumUnits << ", " << SuperIdx << ", "
-       << BufferSize << "}" << Sep << " // #" << i+1;
+       << BufferSize << "}, // #" << i+1;
     if (SuperDef)
       OS << ", Super=" << SuperDef->getName();
     OS << "\n";
@@ -688,8 +668,8 @@ Record *SubtargetEmitter::FindWriteResources(
   // then call FindWriteResources recursively with that model here.
   if (!ResDef) {
     PrintFatalError(ProcModel.ModelDef->getLoc(),
-                  std::string("Processor does not define resources for ")
-                  + SchedWrite.TheDef->getName());
+                    Twine("Processor does not define resources for ") +
+                    SchedWrite.TheDef->getName());
   }
   return ResDef;
 }
@@ -740,8 +720,8 @@ Record *SubtargetEmitter::FindReadAdvance(const CodeGenSchedRW &SchedRead,
   // then call FindReadAdvance recursively with that model here.
   if (!ResDef && SchedRead.TheDef->getName() != "ReadDefault") {
     PrintFatalError(ProcModel.ModelDef->getLoc(),
-                  std::string("Processor does not define resources for ")
-                  + SchedRead.TheDef->getName());
+                    Twine("Processor does not define resources for ") +
+                    SchedRead.TheDef->getName());
   }
   return ResDef;
 }
@@ -760,7 +740,7 @@ void SubtargetEmitter::ExpandProcResources(RecVec &PRVec,
       SubResources = PRDef->getValueAsListOfDefs("Resources");
     else {
       SubResources.push_back(PRDef);
-      PRDef = SchedModels.findProcResUnits(PRVec[i], PM);
+      PRDef = SchedModels.findProcResUnits(PRDef, PM, PRDef->getLoc());
       for (Record *SubDef = PRDef;
            SubDef->getValueInit("Super")->isComplete();) {
         if (SubDef->isSubClassOf("ProcResGroup")) {
@@ -769,7 +749,8 @@ void SubtargetEmitter::ExpandProcResources(RecVec &PRVec,
                           " cannot be a super resources.");
         }
         Record *SuperDef =
-          SchedModels.findProcResUnits(SubDef->getValueAsDef("Super"), PM);
+            SchedModels.findProcResUnits(SubDef->getValueAsDef("Super"), PM,
+                                         SubDef->getLoc());
         PRVec.push_back(SuperDef);
         Cycles.push_back(Cycles[i]);
         SubDef = SuperDef;
@@ -818,14 +799,10 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
 
     // A Variant SchedClass has no resources of its own.
     bool HasVariants = false;
-    for (std::vector<CodeGenSchedTransition>::const_iterator
-           TI = SC.Transitions.begin(), TE = SC.Transitions.end();
-         TI != TE; ++TI) {
-      if (TI->ProcIndices[0] == 0) {
-        HasVariants = true;
-        break;
-      }
-      if (is_contained(TI->ProcIndices, ProcModel.Index)) {
+    for (const CodeGenSchedTransition &CGT :
+           make_range(SC.Transitions.begin(), SC.Transitions.end())) {
+      if (CGT.ProcIndices[0] == 0 ||
+          is_contained(CGT.ProcIndices, ProcModel.Index)) {
         HasVariants = true;
         break;
       }
@@ -1132,10 +1109,8 @@ void SubtargetEmitter::EmitSchedClassTables(SchedClassTables &SchedTables,
          << ", " << format("%2d", MCDesc.WriteLatencyIdx)
          << ", " << MCDesc.NumWriteLatencyEntries
          << ", " << format("%2d", MCDesc.ReadAdvanceIdx)
-         << ", " << MCDesc.NumReadAdvanceEntries << "}";
-      if (SCIdx + 1 < SCEnd)
-        OS << ',';
-      OS << " // #" << SCIdx << '\n';
+         << ", " << MCDesc.NumReadAdvanceEntries
+         << "}, // #" << SCIdx << '\n';
     }
     OS << "}; // " << PI->ModelName << "SchedClasses\n";
   }
@@ -1184,9 +1159,10 @@ void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) {
       OS << "  nullptr, nullptr, 0, 0,"
          << " // No instruction-level machine model.\n";
     if (PM.hasItineraries())
-      OS << "  " << PM.ItinsDef->getName() << "};\n";
+      OS << "  " << PM.ItinsDef->getName() << "\n";
     else
-      OS << "  nullptr}; // No Itinerary\n";
+      OS << "  nullptr // No Itinerary\n";
+    OS << "};\n";
   }
 }
 
@@ -1206,21 +1182,13 @@ void SubtargetEmitter::EmitProcessorLookup(raw_ostream &OS) {
      << Target << "ProcSchedKV[] = {\n";
 
   // For each processor
-  for (unsigned i = 0, N = ProcessorList.size(); i < N;) {
-    // Next processor
-    Record *Processor = ProcessorList[i];
-
+  for (Record *Processor : ProcessorList) {
     StringRef Name = Processor->getValueAsString("Name");
     const std::string &ProcModelName =
       SchedModels.getModelForProc(Processor).ModelName;
 
     // Emit as { "cpu", procinit },
-    OS << "  { \"" << Name << "\", (const void *)&" << ProcModelName << " }";
-
-    // Depending on ''if more in the list'' emit comma
-    if (++i < N) OS << ",";
-
-    OS << "\n";
+    OS << "  { \"" << Name << "\", (const void *)&" << ProcModelName << " },\n";
   }
 
   // End processor table
@@ -1234,7 +1202,7 @@ void SubtargetEmitter::EmitSchedModel(raw_ostream &OS) {
   OS << "#ifdef DBGFIELD\n"
      << "#error \"<target>GenSubtargetInfo.inc requires a DBGFIELD macro\"\n"
      << "#endif\n"
-     << "#ifndef NDEBUG\n"
+     << "#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)\n"
      << "#define DBGFIELD(x) x,\n"
      << "#else\n"
      << "#define DBGFIELD(x)\n"
@@ -1260,7 +1228,7 @@ void SubtargetEmitter::EmitSchedModel(raw_ostream &OS) {
   // Emit the processor lookup data
   EmitProcessorLookup(OS);
 
-  OS << "#undef DBGFIELD";
+  OS << "\n#undef DBGFIELD";
 }
 
 void SubtargetEmitter::EmitSchedModelHelpers(const std::string &ClassName,
@@ -1329,6 +1297,22 @@ void SubtargetEmitter::EmitSchedModelHelpers(const std::string &ClassName,
      << "} // " << ClassName << "::resolveSchedClass\n";
 }
 
+void SubtargetEmitter::EmitHwModeCheck(const std::string &ClassName,
+                                       raw_ostream &OS) {
+  const CodeGenHwModes &CGH = TGT.getHwModes();
+  assert(CGH.getNumModeIds() > 0);
+  if (CGH.getNumModeIds() == 1)
+    return;
+
+  OS << "unsigned " << ClassName << "::getHwMode() const {\n";
+  for (unsigned M = 1, NumModes = CGH.getNumModeIds(); M != NumModes; ++M) {
+    const HwMode &HM = CGH.getMode(M);
+    OS << "  if (checkFeatures(\"" << HM.Features
+       << "\")) return " << M << ";\n";
+  }
+  OS << "  return 0;\n}\n";
+}
+
 //
 // ParseFeaturesFunction - Produces a subtarget specific function for parsing
 // the subtarget features string.
@@ -1408,7 +1392,7 @@ void SubtargetEmitter::run(raw_ostream &OS) {
 #endif
 
   // MCInstrInfo initialization routine.
-  OS << "static inline MCSubtargetInfo *create" << Target
+  OS << "\nstatic inline MCSubtargetInfo *create" << Target
      << "MCSubtargetInfoImpl("
      << "const Triple &TT, StringRef CPU, StringRef FS) {\n";
   OS << "  return new MCSubtargetInfo(TT, CPU, FS, ";
@@ -1462,9 +1446,11 @@ void SubtargetEmitter::run(raw_ostream &OS) {
      << " const MachineInstr *DefMI,"
      << " const TargetSchedModel *SchedModel) const override;\n"
      << "  DFAPacketizer *createDFAPacketizer(const InstrItineraryData *IID)"
-     << " const;\n"
-     << "};\n";
-  OS << "} // end namespace llvm\n\n";
+     << " const;\n";
+  if (TGT.getHwModes().getNumModeIds() > 1)
+    OS << "  unsigned getHwMode() const override;\n";
+  OS << "};\n"
+     << "} // end namespace llvm\n\n";
 
   OS << "#endif // GET_SUBTARGETINFO_HEADER\n\n";
 
@@ -1515,6 +1501,7 @@ void SubtargetEmitter::run(raw_ostream &OS) {
   OS << ") {}\n\n";
 
   EmitSchedModelHelpers(ClassName, OS);
+  EmitHwModeCheck(ClassName, OS);
 
   OS << "} // end namespace llvm\n\n";
 
diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp
index 00d20f1df6c2..b0e0385a45c7 100644
--- a/utils/TableGen/TableGen.cpp
+++ b/utils/TableGen/TableGen.cpp
@@ -16,7 +16,6 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Signals.h"
-#include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Main.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/SetTheory.h"
@@ -28,6 +27,7 @@ enum ActionType {
   GenEmitter,
   GenRegisterInfo,
   GenInstrInfo,
+  GenInstrDocs,
   GenAsmWriter,
   GenAsmMatcher,
   GenDisassembler,
@@ -47,6 +47,7 @@ enum ActionType {
   GenSearchableTables,
   GenGlobalISel,
   GenX86EVEX2VEXTables,
+  GenX86FoldTables,
   GenRegisterBank,
 };
 
@@ -61,6 +62,8 @@ namespace {
                                "Generate registers and register classes info"),
                     clEnumValN(GenInstrInfo, "gen-instr-info",
                                "Generate instruction descriptions"),
+                    clEnumValN(GenInstrDocs, "gen-instr-docs",
+                               "Generate instruction documentation"),
                     clEnumValN(GenCallingConv, "gen-callingconv",
                                "Generate calling convention descriptions"),
                     clEnumValN(GenAsmWriter, "gen-asm-writer",
@@ -99,6 +102,8 @@ namespace {
                                "Generate GlobalISel selector"),
                     clEnumValN(GenX86EVEX2VEXTables, "gen-x86-EVEX2VEX-tables",
                                "Generate X86 EVEX to VEX compress tables"),
+                    clEnumValN(GenX86FoldTables, "gen-x86-fold-tables",
+                               "Generate X86 fold tables"),
                     clEnumValN(GenRegisterBank, "gen-register-bank",
                                "Generate registers bank descriptions")));
 
@@ -121,6 +126,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
   case GenInstrInfo:
     EmitInstrInfo(Records, OS);
     break;
+  case GenInstrDocs:
+    EmitInstrDocs(Records, OS);
+    break;
   case GenCallingConv:
     EmitCallingConv(Records, OS);
     break;
@@ -196,6 +204,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
   case GenX86EVEX2VEXTables:
     EmitX86EVEX2VEXTables(Records, OS);
     break;
+  case GenX86FoldTables:
+    EmitX86FoldTables(Records, OS);
+    break;
   }
 
   return false;
@@ -217,6 +228,6 @@ int main(int argc, char **argv) {
 #include <sanitizer/lsan_interface.h>
 // Disable LeakSanitizer for this binary as it has too many leaks that are not
 // very interesting to fix. See compiler-rt/include/sanitizer/lsan_interface.h .
-int __lsan_is_turned_off() { return 1; }
+LLVM_ATTRIBUTE_USED int __lsan_is_turned_off() { return 1; }
 #endif  // __has_feature(address_sanitizer)
 #endif  // defined(__has_feature)
diff --git a/utils/TableGen/TableGenBackends.h b/utils/TableGen/TableGenBackends.h
index 2512997e27f9..914cd5a1fc9b 100644
--- a/utils/TableGen/TableGenBackends.h
+++ b/utils/TableGen/TableGenBackends.h
@@ -72,6 +72,7 @@ void EmitDFAPacketizer(RecordKeeper &RK, raw_ostream &OS);
 void EmitDisassembler(RecordKeeper &RK, raw_ostream &OS);
 void EmitFastISel(RecordKeeper &RK, raw_ostream &OS);
 void EmitInstrInfo(RecordKeeper &RK, raw_ostream &OS);
+void EmitInstrDocs(RecordKeeper &RK, raw_ostream &OS);
 void EmitPseudoLowering(RecordKeeper &RK, raw_ostream &OS);
 void EmitRegisterInfo(RecordKeeper &RK, raw_ostream &OS);
 void EmitSubtarget(RecordKeeper &RK, raw_ostream &OS);
@@ -82,6 +83,7 @@ void EmitAttributes(RecordKeeper &RK, raw_ostream &OS);
 void EmitSearchableTables(RecordKeeper &RK, raw_ostream &OS);
 void EmitGlobalISel(RecordKeeper &RK, raw_ostream &OS);
 void EmitX86EVEX2VEXTables(RecordKeeper &RK, raw_ostream &OS);
+void EmitX86FoldTables(RecordKeeper &RK, raw_ostream &OS);
 void EmitRegisterBank(RecordKeeper &RK, raw_ostream &OS);
 
 } // End llvm namespace
diff --git a/utils/TableGen/X86DisassemblerTables.cpp b/utils/TableGen/X86DisassemblerTables.cpp
index c80b96905b30..fce41f7a2cc2 100644
--- a/utils/TableGen/X86DisassemblerTables.cpp
+++ b/utils/TableGen/X86DisassemblerTables.cpp
@@ -74,33 +74,34 @@ static inline const char* stringForOperandEncoding(OperandEncoding encoding) {
 /// @param parent - The class that may be the superset
 /// @return       - True if child is a subset of parent, false otherwise.
 static inline bool inheritsFrom(InstructionContext child,
-                                InstructionContext parent,
-                                bool VEX_LIG = false, bool AdSize64 = false) {
+                                InstructionContext parent, bool noPrefix = true,
+                                bool VEX_LIG = false, bool VEX_WIG = false,
+                                bool AdSize64 = false) {
   if (child == parent)
     return true;
 
   switch (parent) {
   case IC:
     return(inheritsFrom(child, IC_64BIT, AdSize64) ||
-           inheritsFrom(child, IC_OPSIZE) ||
+           (noPrefix && inheritsFrom(child, IC_OPSIZE, noPrefix)) ||
            inheritsFrom(child, IC_ADSIZE) ||
-           inheritsFrom(child, IC_XD) ||
-           inheritsFrom(child, IC_XS));
+           (noPrefix && inheritsFrom(child, IC_XD, noPrefix)) ||
+           (noPrefix && inheritsFrom(child, IC_XS, noPrefix)));
   case IC_64BIT:
     return(inheritsFrom(child, IC_64BIT_REXW)   ||
-           inheritsFrom(child, IC_64BIT_OPSIZE) ||
+           (noPrefix && inheritsFrom(child, IC_64BIT_OPSIZE, noPrefix)) ||
            (!AdSize64 && inheritsFrom(child, IC_64BIT_ADSIZE)) ||
-           inheritsFrom(child, IC_64BIT_XD)     ||
-           inheritsFrom(child, IC_64BIT_XS));
+           (noPrefix && inheritsFrom(child, IC_64BIT_XD, noPrefix))     ||
+           (noPrefix && inheritsFrom(child, IC_64BIT_XS, noPrefix)));
   case IC_OPSIZE:
     return inheritsFrom(child, IC_64BIT_OPSIZE) ||
            inheritsFrom(child, IC_OPSIZE_ADSIZE);
   case IC_ADSIZE:
-    return inheritsFrom(child, IC_OPSIZE_ADSIZE);
+    return (noPrefix && inheritsFrom(child, IC_OPSIZE_ADSIZE, noPrefix));
   case IC_OPSIZE_ADSIZE:
     return false;
   case IC_64BIT_ADSIZE:
-    return inheritsFrom(child, IC_64BIT_OPSIZE_ADSIZE);
+    return (noPrefix && inheritsFrom(child, IC_64BIT_OPSIZE_ADSIZE, noPrefix));
   case IC_64BIT_OPSIZE_ADSIZE:
     return false;
   case IC_XD:
@@ -112,9 +113,9 @@ static inline bool inheritsFrom(InstructionContext child,
   case IC_XS_OPSIZE:
     return inheritsFrom(child, IC_64BIT_XS_OPSIZE);
   case IC_64BIT_REXW:
-    return(inheritsFrom(child, IC_64BIT_REXW_XS) ||
-           inheritsFrom(child, IC_64BIT_REXW_XD) ||
-           inheritsFrom(child, IC_64BIT_REXW_OPSIZE) ||
+    return((noPrefix && inheritsFrom(child, IC_64BIT_REXW_XS, noPrefix)) ||
+           (noPrefix && inheritsFrom(child, IC_64BIT_REXW_XD, noPrefix)) ||
+           (noPrefix && inheritsFrom(child, IC_64BIT_REXW_OPSIZE, noPrefix)) ||
            (!AdSize64 && inheritsFrom(child, IC_64BIT_REXW_ADSIZE)));
   case IC_64BIT_OPSIZE:
     return inheritsFrom(child, IC_64BIT_REXW_OPSIZE) ||
@@ -133,20 +134,20 @@ static inline bool inheritsFrom(InstructionContext child,
   case IC_64BIT_REXW_ADSIZE:
     return false;
   case IC_VEX:
-    return (VEX_LIG && inheritsFrom(child, IC_VEX_L_W)) ||
-           inheritsFrom(child, IC_VEX_W) ||
+    return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_VEX_L_W)) ||
+           (VEX_WIG && inheritsFrom(child, IC_VEX_W)) ||
            (VEX_LIG && inheritsFrom(child, IC_VEX_L));
   case IC_VEX_XS:
-    return (VEX_LIG && inheritsFrom(child, IC_VEX_L_W_XS)) ||
-           inheritsFrom(child, IC_VEX_W_XS) ||
+    return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_VEX_L_W_XS)) ||
+           (VEX_WIG && inheritsFrom(child, IC_VEX_W_XS)) ||
            (VEX_LIG && inheritsFrom(child, IC_VEX_L_XS));
   case IC_VEX_XD:
-    return (VEX_LIG && inheritsFrom(child, IC_VEX_L_W_XD)) ||
-           inheritsFrom(child, IC_VEX_W_XD) ||
+    return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_VEX_L_W_XD)) ||
+           (VEX_WIG && inheritsFrom(child, IC_VEX_W_XD)) ||
            (VEX_LIG && inheritsFrom(child, IC_VEX_L_XD));
   case IC_VEX_OPSIZE:
-    return (VEX_LIG && inheritsFrom(child, IC_VEX_L_W_OPSIZE)) ||
-           inheritsFrom(child, IC_VEX_W_OPSIZE) ||
+    return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_VEX_L_W_OPSIZE)) ||
+           (VEX_WIG && inheritsFrom(child, IC_VEX_W_OPSIZE)) ||
            (VEX_LIG && inheritsFrom(child, IC_VEX_L_OPSIZE));
   case IC_VEX_W:
     return VEX_LIG && inheritsFrom(child, IC_VEX_L_W);
@@ -157,193 +158,392 @@ static inline bool inheritsFrom(InstructionContext child,
   case IC_VEX_W_OPSIZE:
     return VEX_LIG && inheritsFrom(child, IC_VEX_L_W_OPSIZE);
   case IC_VEX_L:
-    return inheritsFrom(child, IC_VEX_L_W);
+    return VEX_WIG && inheritsFrom(child, IC_VEX_L_W);
   case IC_VEX_L_XS:
-    return inheritsFrom(child, IC_VEX_L_W_XS);
+    return VEX_WIG && inheritsFrom(child, IC_VEX_L_W_XS);
   case IC_VEX_L_XD:
-    return inheritsFrom(child, IC_VEX_L_W_XD);
+    return VEX_WIG && inheritsFrom(child, IC_VEX_L_W_XD);
   case IC_VEX_L_OPSIZE:
-    return inheritsFrom(child, IC_VEX_L_W_OPSIZE);
+    return VEX_WIG && inheritsFrom(child, IC_VEX_L_W_OPSIZE);
   case IC_VEX_L_W:
   case IC_VEX_L_W_XS:
   case IC_VEX_L_W_XD:
   case IC_VEX_L_W_OPSIZE:
     return false;
   case IC_EVEX:
-    return inheritsFrom(child, IC_EVEX_W) ||
-           inheritsFrom(child, IC_EVEX_L_W);
+    return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W)) ||
+           (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W)) ||
+           (VEX_WIG && inheritsFrom(child, IC_EVEX_W)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2));
   case IC_EVEX_XS:
-    return inheritsFrom(child, IC_EVEX_W_XS) ||
-           inheritsFrom(child, IC_EVEX_L_W_XS);
+    return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XS)) ||
+           (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XS)) ||
+           (VEX_WIG && inheritsFrom(child, IC_EVEX_W_XS)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L_XS)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_XS));
   case IC_EVEX_XD:
-    return inheritsFrom(child, IC_EVEX_W_XD) ||
-           inheritsFrom(child, IC_EVEX_L_W_XD);
+    return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XD)) ||
+           (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XD)) ||
+           (VEX_WIG && inheritsFrom(child, IC_EVEX_W_XD)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L_XD)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_XD));
   case IC_EVEX_OPSIZE:
-    return inheritsFrom(child, IC_EVEX_W_OPSIZE) ||
-           inheritsFrom(child, IC_EVEX_L_W_OPSIZE);
-  case IC_EVEX_B:
-    return false;
+    return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE)) ||
+           (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE)) ||
+           (VEX_WIG && inheritsFrom(child, IC_EVEX_W_OPSIZE)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L_OPSIZE)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_OPSIZE));
+  case IC_EVEX_K:
+    return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_K)) ||
+           (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_K)) ||
+           (VEX_WIG && inheritsFrom(child, IC_EVEX_W_K)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L_K)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_K));
+  case IC_EVEX_XS_K:
+    return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XS_K)) ||
+           (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XS_K)) ||
+           (VEX_WIG && inheritsFrom(child, IC_EVEX_W_XS_K)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L_XS_K)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_XS_K));
+  case IC_EVEX_XD_K:
+    return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XD_K)) ||
+           (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XD_K)) ||
+           (VEX_WIG && inheritsFrom(child, IC_EVEX_W_XD_K)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L_XD_K)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_XD_K));
+  case IC_EVEX_OPSIZE_K:
+    return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE_K)) ||
+           (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_K)) ||
+           (VEX_WIG && inheritsFrom(child, IC_EVEX_W_OPSIZE_K)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L_OPSIZE_K)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_OPSIZE_K));
+  case IC_EVEX_KZ:
+    return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_KZ)) ||
+           (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_KZ)) ||
+           (VEX_WIG && inheritsFrom(child, IC_EVEX_W_KZ)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L_KZ)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_KZ));
+  case IC_EVEX_XS_KZ:
+    return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XS_KZ)) ||
+           (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XS_KZ)) ||
+           (VEX_WIG && inheritsFrom(child, IC_EVEX_W_XS_KZ)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L_XS_KZ)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_XS_KZ));
+  case IC_EVEX_XD_KZ:
+    return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XD_KZ)) ||
+           (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XD_KZ)) ||
+           (VEX_WIG && inheritsFrom(child, IC_EVEX_W_XD_KZ)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L_XD_KZ)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_XD_KZ));
+  case IC_EVEX_OPSIZE_KZ:
+    return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE_KZ)) ||
+           (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_KZ)) ||
+           (VEX_WIG && inheritsFrom(child, IC_EVEX_W_OPSIZE_KZ)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L_OPSIZE_KZ)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_OPSIZE_KZ));
   case IC_EVEX_W:
+    return (VEX_LIG && inheritsFrom(child, IC_EVEX_L_W)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_W));
   case IC_EVEX_W_XS:
+    return (VEX_LIG && inheritsFrom(child, IC_EVEX_L_W_XS)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_W_XS));
   case IC_EVEX_W_XD:
+    return (VEX_LIG && inheritsFrom(child, IC_EVEX_L_W_XD)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_W_XD));
   case IC_EVEX_W_OPSIZE:
-    return false;
+    return (VEX_LIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE));
+  case IC_EVEX_W_K:
+    return (VEX_LIG && inheritsFrom(child, IC_EVEX_L_W_K)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_W_K));
+  case IC_EVEX_W_XS_K:
+    return (VEX_LIG && inheritsFrom(child, IC_EVEX_L_W_XS_K)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_W_XS_K));
+  case IC_EVEX_W_XD_K:
+    return (VEX_LIG && inheritsFrom(child, IC_EVEX_L_W_XD_K)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_W_XD_K));
+  case IC_EVEX_W_OPSIZE_K:
+    return (VEX_LIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE_K)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_K));
+  case IC_EVEX_W_KZ:
+    return (VEX_LIG && inheritsFrom(child, IC_EVEX_L_W_KZ)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_W_KZ));
+  case IC_EVEX_W_XS_KZ:
+    return (VEX_LIG && inheritsFrom(child, IC_EVEX_L_W_XS_KZ)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_W_XS_KZ));
+  case IC_EVEX_W_XD_KZ:
+    return (VEX_LIG && inheritsFrom(child, IC_EVEX_L_W_XD_KZ)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_W_XD_KZ));
+  case IC_EVEX_W_OPSIZE_KZ:
+    return (VEX_LIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE_KZ)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_KZ));
   case IC_EVEX_L:
-  case IC_EVEX_L_K_B:
-  case IC_EVEX_L_KZ_B:
-  case IC_EVEX_L_B:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W);
   case IC_EVEX_L_XS:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XS);
   case IC_EVEX_L_XD:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XD);
   case IC_EVEX_L_OPSIZE:
-    return false;
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE);
+  case IC_EVEX_L_K:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_K);
+  case IC_EVEX_L_XS_K:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XS_K);
+  case IC_EVEX_L_XD_K:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XD_K);
+  case IC_EVEX_L_OPSIZE_K:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE_K);
+  case IC_EVEX_L_KZ:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_KZ);
+  case IC_EVEX_L_XS_KZ:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XS_KZ);
+  case IC_EVEX_L_XD_KZ:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XD_KZ);
+  case IC_EVEX_L_OPSIZE_KZ:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE_KZ);
   case IC_EVEX_L_W:
   case IC_EVEX_L_W_XS:
   case IC_EVEX_L_W_XD:
   case IC_EVEX_L_W_OPSIZE:
     return false;
+  case IC_EVEX_L_W_K:
+  case IC_EVEX_L_W_XS_K:
+  case IC_EVEX_L_W_XD_K:
+  case IC_EVEX_L_W_OPSIZE_K:
+    return false;
+  case IC_EVEX_L_W_KZ:
+  case IC_EVEX_L_W_XS_KZ:
+  case IC_EVEX_L_W_XD_KZ:
+  case IC_EVEX_L_W_OPSIZE_KZ:
+    return false;
   case IC_EVEX_L2:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W);
   case IC_EVEX_L2_XS:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XS);
   case IC_EVEX_L2_XD:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XD);
   case IC_EVEX_L2_OPSIZE:
-    return false;
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE);
+  case IC_EVEX_L2_K:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_K);
+  case IC_EVEX_L2_XS_K:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XS_K);
+  case IC_EVEX_L2_XD_K:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XD_K);
+  case IC_EVEX_L2_OPSIZE_K:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_K);
+  case IC_EVEX_L2_KZ:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_KZ);
+  case IC_EVEX_L2_XS_KZ:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XS_KZ);
+  case IC_EVEX_L2_XD_KZ:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XD_KZ);
+  case IC_EVEX_L2_OPSIZE_KZ:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_KZ);
   case IC_EVEX_L2_W:
   case IC_EVEX_L2_W_XS:
   case IC_EVEX_L2_W_XD:
   case IC_EVEX_L2_W_OPSIZE:
     return false;
-  case IC_EVEX_K:
-    return inheritsFrom(child, IC_EVEX_W_K) ||
-           inheritsFrom(child, IC_EVEX_L_W_K);
-  case IC_EVEX_XS_K:
-  case IC_EVEX_XS_K_B:
-  case IC_EVEX_XS_KZ_B:
-    return inheritsFrom(child, IC_EVEX_W_XS_K) ||
-           inheritsFrom(child, IC_EVEX_L_W_XS_K);
-  case IC_EVEX_XD_K:
-  case IC_EVEX_XD_K_B:
-  case IC_EVEX_XD_KZ_B:
-    return inheritsFrom(child, IC_EVEX_W_XD_K) ||
-           inheritsFrom(child, IC_EVEX_L_W_XD_K);
+  case IC_EVEX_L2_W_K:
+  case IC_EVEX_L2_W_XS_K:
+  case IC_EVEX_L2_W_XD_K:
+  case IC_EVEX_L2_W_OPSIZE_K:
+    return false;
+  case IC_EVEX_L2_W_KZ:
+  case IC_EVEX_L2_W_XS_KZ:
+  case IC_EVEX_L2_W_XD_KZ:
+  case IC_EVEX_L2_W_OPSIZE_KZ:
+    return false;
+  case IC_EVEX_B:
+    return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_B)) ||
+           (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_B)) ||
+           (VEX_WIG && inheritsFrom(child, IC_EVEX_W_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_B));
   case IC_EVEX_XS_B:
+    return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XS_B)) ||
+           (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XS_B)) ||
+           (VEX_WIG && inheritsFrom(child, IC_EVEX_W_XS_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L_XS_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_XS_B));
   case IC_EVEX_XD_B:
-  case IC_EVEX_K_B:
-  case IC_EVEX_KZ:
-    return false;
-  case IC_EVEX_XS_KZ:
-    return inheritsFrom(child, IC_EVEX_W_XS_KZ) ||
-           inheritsFrom(child, IC_EVEX_L_W_XS_KZ);
-  case IC_EVEX_XD_KZ:
-    return inheritsFrom(child, IC_EVEX_W_XD_KZ) ||
-           inheritsFrom(child, IC_EVEX_L_W_XD_KZ);
-  case IC_EVEX_KZ_B:
-  case IC_EVEX_OPSIZE_K:
+    return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XD_B)) ||
+           (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XD_B)) ||
+           (VEX_WIG && inheritsFrom(child, IC_EVEX_W_XD_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L_XD_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_XD_B));
   case IC_EVEX_OPSIZE_B:
+    return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE_B)) ||
+           (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_B)) ||
+           (VEX_WIG && inheritsFrom(child, IC_EVEX_W_OPSIZE_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L_OPSIZE_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_OPSIZE_B));
+  case IC_EVEX_K_B:
+    return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_K_B)) ||
+           (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_K_B)) ||
+           (VEX_WIG && inheritsFrom(child, IC_EVEX_W_K_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L_K_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_K_B));
+  case IC_EVEX_XS_K_B:
+    return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XS_K_B)) ||
+           (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XS_K_B)) ||
+           (VEX_WIG && inheritsFrom(child, IC_EVEX_W_XS_K_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L_XS_K_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_XS_K_B));
+  case IC_EVEX_XD_K_B:
+    return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XD_K_B)) ||
+           (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XD_K_B)) ||
+           (VEX_WIG && inheritsFrom(child, IC_EVEX_W_XD_K_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L_XD_K_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_XD_K_B));
   case IC_EVEX_OPSIZE_K_B:
-  case IC_EVEX_OPSIZE_KZ:
+    return (VEX_LIG && VEX_WIG &&
+            inheritsFrom(child, IC_EVEX_L_W_OPSIZE_K_B)) ||
+           (VEX_LIG && VEX_WIG &&
+            inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_K_B)) ||
+           (VEX_WIG && inheritsFrom(child, IC_EVEX_W_OPSIZE_K_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L_OPSIZE_K_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_OPSIZE_K_B));
+  case IC_EVEX_KZ_B:
+    return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_KZ_B)) ||
+           (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_KZ_B)) ||
+           (VEX_WIG && inheritsFrom(child, IC_EVEX_W_KZ_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L_KZ_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_KZ_B));
+  case IC_EVEX_XS_KZ_B:
+    return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XS_KZ_B)) ||
+           (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XS_KZ_B)) ||
+           (VEX_WIG && inheritsFrom(child, IC_EVEX_W_XS_KZ_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L_XS_KZ_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_XS_KZ_B));
+  case IC_EVEX_XD_KZ_B:
+    return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XD_KZ_B)) ||
+           (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XD_KZ_B)) ||
+           (VEX_WIG && inheritsFrom(child, IC_EVEX_W_XD_KZ_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L_XD_KZ_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_XD_KZ_B));
   case IC_EVEX_OPSIZE_KZ_B:
-    return false;
-  case IC_EVEX_W_K:
+    return (VEX_LIG && VEX_WIG &&
+            inheritsFrom(child, IC_EVEX_L_W_OPSIZE_KZ_B)) ||
+           (VEX_LIG && VEX_WIG &&
+            inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_KZ_B)) ||
+           (VEX_WIG && inheritsFrom(child, IC_EVEX_W_OPSIZE_KZ_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L_OPSIZE_KZ_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_OPSIZE_KZ_B));
   case IC_EVEX_W_B:
-  case IC_EVEX_W_K_B:
-  case IC_EVEX_W_KZ_B:
-  case IC_EVEX_W_XS_K:
-  case IC_EVEX_W_XD_K:
-  case IC_EVEX_W_OPSIZE_K:
-  case IC_EVEX_W_OPSIZE_B:
-  case IC_EVEX_W_OPSIZE_K_B:
-    return false;
-  case IC_EVEX_L_K:
-  case IC_EVEX_L_XS_K:
-  case IC_EVEX_L_XD_K:
-  case IC_EVEX_L_XD_B:
-  case IC_EVEX_L_XD_K_B:
-  case IC_EVEX_L_OPSIZE_K:
-  case IC_EVEX_L_OPSIZE_B:
-  case IC_EVEX_L_OPSIZE_K_B:
-    return false;
-  case IC_EVEX_W_KZ:
-  case IC_EVEX_W_XS_KZ:
-  case IC_EVEX_W_XD_KZ:
+    return (VEX_LIG && inheritsFrom(child, IC_EVEX_L_W_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_W_B));
   case IC_EVEX_W_XS_B:
+    return (VEX_LIG && inheritsFrom(child, IC_EVEX_L_W_XS_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_W_XS_B));
   case IC_EVEX_W_XD_B:
+    return (VEX_LIG && inheritsFrom(child, IC_EVEX_L_W_XD_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_W_XD_B));
+  case IC_EVEX_W_OPSIZE_B:
+    return (VEX_LIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_B));
+  case IC_EVEX_W_K_B:
+    return (VEX_LIG && inheritsFrom(child, IC_EVEX_L_W_K_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_W_K_B));
   case IC_EVEX_W_XS_K_B:
+    return (VEX_LIG && inheritsFrom(child, IC_EVEX_L_W_XS_K_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_W_XS_K_B));
   case IC_EVEX_W_XD_K_B:
+    return (VEX_LIG && inheritsFrom(child, IC_EVEX_L_W_XD_K_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_W_XD_K_B));
+  case IC_EVEX_W_OPSIZE_K_B:
+    return (VEX_LIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE_K_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_K_B));
+  case IC_EVEX_W_KZ_B:
+    return (VEX_LIG && inheritsFrom(child, IC_EVEX_L_W_KZ_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_W_KZ_B));
   case IC_EVEX_W_XS_KZ_B:
+    return (VEX_LIG && inheritsFrom(child, IC_EVEX_L_W_XS_KZ_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_W_XS_KZ_B));
   case IC_EVEX_W_XD_KZ_B:
-  case IC_EVEX_W_OPSIZE_KZ:
+    return (VEX_LIG && inheritsFrom(child, IC_EVEX_L_W_XD_KZ_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_W_XD_KZ_B));
   case IC_EVEX_W_OPSIZE_KZ_B:
-    return false;
-  case IC_EVEX_L_KZ:
-  case IC_EVEX_L_XS_KZ:
+    return (VEX_LIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE_KZ_B)) ||
+           (VEX_LIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_KZ_B));
+  case IC_EVEX_L_B:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_B);
   case IC_EVEX_L_XS_B:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XS_B);
+  case IC_EVEX_L_XD_B:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XD_B);
+  case IC_EVEX_L_OPSIZE_B:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE_B);
+  case IC_EVEX_L_K_B:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_K_B);
   case IC_EVEX_L_XS_K_B:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XS_K_B);
+  case IC_EVEX_L_XD_K_B:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XD_K_B);
+  case IC_EVEX_L_OPSIZE_K_B:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE_K_B);
+  case IC_EVEX_L_KZ_B:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_KZ_B);
   case IC_EVEX_L_XS_KZ_B:
-  case IC_EVEX_L_XD_KZ:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XS_KZ_B);
   case IC_EVEX_L_XD_KZ_B:
-  case IC_EVEX_L_OPSIZE_KZ:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XD_KZ_B);
   case IC_EVEX_L_OPSIZE_KZ_B:
-    return false;
-  case IC_EVEX_L_W_K:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE_KZ_B);
   case IC_EVEX_L_W_B:
-  case IC_EVEX_L_W_K_B:
-  case IC_EVEX_L_W_XS_K:
   case IC_EVEX_L_W_XS_B:
-  case IC_EVEX_L_W_XS_K_B:
-  case IC_EVEX_L_W_XS_KZ:
-  case IC_EVEX_L_W_XS_KZ_B:
-  case IC_EVEX_L_W_OPSIZE_K:
+  case IC_EVEX_L_W_XD_B:
   case IC_EVEX_L_W_OPSIZE_B:
+    return false;
+  case IC_EVEX_L_W_K_B:
+  case IC_EVEX_L_W_XS_K_B:
+  case IC_EVEX_L_W_XD_K_B:
   case IC_EVEX_L_W_OPSIZE_K_B:
-  case IC_EVEX_L_W_KZ:
+    return false;
   case IC_EVEX_L_W_KZ_B:
-  case IC_EVEX_L_W_XD_K:
-  case IC_EVEX_L_W_XD_B:
-  case IC_EVEX_L_W_XD_K_B:
-  case IC_EVEX_L_W_XD_KZ:
+  case IC_EVEX_L_W_XS_KZ_B:
   case IC_EVEX_L_W_XD_KZ_B:
-  case IC_EVEX_L_W_OPSIZE_KZ:
   case IC_EVEX_L_W_OPSIZE_KZ_B:
     return false;
-  case IC_EVEX_L2_K:
   case IC_EVEX_L2_B:
-  case IC_EVEX_L2_K_B:
-  case IC_EVEX_L2_KZ_B:
-  case IC_EVEX_L2_XS_K:
-  case IC_EVEX_L2_XS_K_B:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_B);
   case IC_EVEX_L2_XS_B:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XS_B);
   case IC_EVEX_L2_XD_B:
-  case IC_EVEX_L2_XD_K:
-  case IC_EVEX_L2_XD_K_B:
-  case IC_EVEX_L2_OPSIZE_K:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XD_B);
   case IC_EVEX_L2_OPSIZE_B:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_B);
+  case IC_EVEX_L2_K_B:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_K_B);
+  case IC_EVEX_L2_XS_K_B:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XS_K_B);
+  case IC_EVEX_L2_XD_K_B:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XD_K_B);
   case IC_EVEX_L2_OPSIZE_K_B:
-  case IC_EVEX_L2_KZ:
-  case IC_EVEX_L2_XS_KZ:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_K_B);
+  case IC_EVEX_L2_KZ_B:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_KZ_B);
   case IC_EVEX_L2_XS_KZ_B:
-  case IC_EVEX_L2_XD_KZ:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XS_KZ_B);
   case IC_EVEX_L2_XD_KZ_B:
-  case IC_EVEX_L2_OPSIZE_KZ:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XD_KZ_B);
   case IC_EVEX_L2_OPSIZE_KZ_B:
-    return false;
-  case IC_EVEX_L2_W_K:
+    return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_KZ_B);
   case IC_EVEX_L2_W_B:
-  case IC_EVEX_L2_W_K_B:
-  case IC_EVEX_L2_W_KZ_B:
-  case IC_EVEX_L2_W_XS_K:
   case IC_EVEX_L2_W_XS_B:
-  case IC_EVEX_L2_W_XS_K_B:
-  case IC_EVEX_L2_W_XD_K:
   case IC_EVEX_L2_W_XD_B:
-  case IC_EVEX_L2_W_OPSIZE_K:
   case IC_EVEX_L2_W_OPSIZE_B:
+    return false;
+  case IC_EVEX_L2_W_K_B:
+  case IC_EVEX_L2_W_XS_K_B:
+  case IC_EVEX_L2_W_XD_K_B:
   case IC_EVEX_L2_W_OPSIZE_K_B:
-  case IC_EVEX_L2_W_KZ:
-  case IC_EVEX_L2_W_XS_KZ:
+    return false;
+  case IC_EVEX_L2_W_KZ_B:
   case IC_EVEX_L2_W_XS_KZ_B:
-  case IC_EVEX_L2_W_XD_KZ:
-  case IC_EVEX_L2_W_XD_K_B:
   case IC_EVEX_L2_W_XD_KZ_B:
-  case IC_EVEX_L2_W_OPSIZE_KZ:
   case IC_EVEX_L2_W_OPSIZE_KZ_B:
     return false;
   default:
@@ -908,7 +1108,9 @@ void DisassemblerTables::setTableFields(OpcodeType          type,
                                         const ModRMFilter   &filter,
                                         InstrUID            uid,
                                         bool                is32bit,
+                                        bool                noPrefix,
                                         bool                ignoresVEX_L,
+                                        bool                ignoresVEX_W,
                                         unsigned            addressSize) {
   ContextDecision &decision = *Tables[type];
 
@@ -919,8 +1121,8 @@ void DisassemblerTables::setTableFields(OpcodeType          type,
 
     bool adSize64 = addressSize == 64;
     if (inheritsFrom((InstructionContext)index,
-                     InstructionSpecifiers[uid].insnContext, ignoresVEX_L,
-                     adSize64))
+                     InstructionSpecifiers[uid].insnContext, noPrefix,
+                     ignoresVEX_L, ignoresVEX_W, adSize64))
       setTableFields(decision.opcodeDecisions[index].modRMDecisions[opcode],
                      filter,
                      uid,
diff --git a/utils/TableGen/X86DisassemblerTables.h b/utils/TableGen/X86DisassemblerTables.h
index 1171c7980f42..552bbe95f7cd 100644
--- a/utils/TableGen/X86DisassemblerTables.h
+++ b/utils/TableGen/X86DisassemblerTables.h
@@ -244,7 +244,9 @@ public:
   ///                       correspond to the desired instruction.
   /// @param uid          - The unique ID of the instruction.
   /// @param is32bit      - Instructon is only 32-bit
+  /// @param noPrefix     - Instruction record has no prefix.
   /// @param ignoresVEX_L - Instruction ignores VEX.L
+  /// @param ignoresVEX_W - Instruction ignores VEX.W
   /// @param AddrSize     - Instructions address size 16/32/64. 0 is unspecified
   void setTableFields(OpcodeType type,
                       InstructionContext insnContext,
@@ -252,7 +254,9 @@ public:
                       const ModRMFilter &filter,
                       InstrUID uid,
                       bool is32bit,
+                      bool noPrefix,
                       bool ignoresVEX_L,
+                      bool ignoresVEX_W,
                       unsigned AddrSize);
 
   /// specForUID - Returns the instruction specifier for a given unique
diff --git a/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp b/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
index 07b96b03b01c..05f30facd547 100644
--- a/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
+++ b/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
@@ -137,6 +137,9 @@ void X86EVEX2VEXTablesEmitter::printTable(const std::vector<Entry> &Table,
       {"VBROADCASTSDZ256m",   "VBROADCASTSDYrm", false},
       {"VBROADCASTSDZ256r",   "VBROADCASTSDYrr", false},
 
+      {"VBROADCASTF64X2Z128rm", "VBROADCASTF128", false},
+      {"VBROADCASTI64X2Z128rm", "VBROADCASTI128", false},
+
       {"VEXTRACTF64x2Z256mr", "VEXTRACTF128mr",  false},
       {"VEXTRACTF64x2Z256rr", "VEXTRACTF128rr",  false},
       {"VEXTRACTI64x2Z256mr", "VEXTRACTI128mr",  false},
@@ -145,7 +148,21 @@ void X86EVEX2VEXTablesEmitter::printTable(const std::vector<Entry> &Table,
       {"VINSERTF64x2Z256rm",  "VINSERTF128rm",   false},
       {"VINSERTF64x2Z256rr",  "VINSERTF128rr",   false},
       {"VINSERTI64x2Z256rm",  "VINSERTI128rm",   false},
-      {"VINSERTI64x2Z256rr",  "VINSERTI128rr",   false}
+      {"VINSERTI64x2Z256rr",  "VINSERTI128rr",   false},
+
+      // These will require some custom adjustment in the conversion pass.
+      {"VALIGNDZ128rri",      "VPALIGNRrri",     true},
+      {"VALIGNQZ128rri",      "VPALIGNRrri",     true},
+      {"VALIGNDZ128rmi",      "VPALIGNRrmi",     true},
+      {"VALIGNQZ128rmi",      "VPALIGNRrmi",     true},
+      {"VSHUFF32X4Z256rmi",   "VPERM2F128rm",    false},
+      {"VSHUFF32X4Z256rri",   "VPERM2F128rr",    false},
+      {"VSHUFF64X2Z256rmi",   "VPERM2F128rm",    false},
+      {"VSHUFF64X2Z256rri",   "VPERM2F128rr",    false},
+      {"VSHUFI32X4Z256rmi",   "VPERM2I128rm",    false},
+      {"VSHUFI32X4Z256rri",   "VPERM2I128rr",    false},
+      {"VSHUFI64X2Z256rmi",   "VPERM2I128rm",    false},
+      {"VSHUFI64X2Z256rri",   "VPERM2I128rr",    false},
   };
 
   // Print the manually added entries
diff --git a/utils/TableGen/X86FoldTablesEmitter.cpp b/utils/TableGen/X86FoldTablesEmitter.cpp
new file mode 100644
index 000000000000..ff1afa89efc8
--- /dev/null
+++ b/utils/TableGen/X86FoldTablesEmitter.cpp
@@ -0,0 +1,661 @@
+//===- utils/TableGen/X86FoldTablesEmitter.cpp - X86 backend-*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend is responsible for emitting the memory fold tables of
+// the X86 backend instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeGenTarget.h"
+#include "X86RecognizableInstr.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/TableGenBackend.h"
+
+using namespace llvm;
+
+namespace {
+
+// 3 possible strategies for the unfolding flag (TB_NO_REVERSE) of the
+// manual added entries.
+enum UnfoldStrategy {
+  UNFOLD,     // Allow unfolding
+  NO_UNFOLD,  // Prevent unfolding
+  NO_STRATEGY // Make decision according to operands' sizes
+};
+
+// Represents an entry in the manual mapped instructions set.
+struct ManualMapEntry {
+  const char *RegInstStr;
+  const char *MemInstStr;
+  UnfoldStrategy Strategy;
+
+  ManualMapEntry(const char *RegInstStr, const char *MemInstStr,
+                 UnfoldStrategy Strategy = NO_STRATEGY)
+      : RegInstStr(RegInstStr), MemInstStr(MemInstStr), Strategy(Strategy) {}
+};
+
+class IsMatch;
+
+// List of instructions requiring explicitly aligned memory.
+const char *ExplicitAlign[] = {"MOVDQA",  "MOVAPS",  "MOVAPD",  "MOVNTPS",
+                               "MOVNTPD", "MOVNTDQ", "MOVNTDQA"};
+
+// List of instructions NOT requiring explicit memory alignment.
+const char *ExplicitUnalign[] = {"MOVDQU", "MOVUPS", "MOVUPD"};
+
+// For manually mapping instructions that do not match by their encoding.
+const ManualMapEntry ManualMapSet[] = {
+    { "ADD16ri_DB",       "ADD16mi",         NO_UNFOLD  },
+    { "ADD16ri8_DB",      "ADD16mi8",        NO_UNFOLD  },
+    { "ADD16rr_DB",       "ADD16mr",         NO_UNFOLD  },
+    { "ADD32ri_DB",       "ADD32mi",         NO_UNFOLD  },
+    { "ADD32ri8_DB",      "ADD32mi8",        NO_UNFOLD  },
+    { "ADD32rr_DB",       "ADD32mr",         NO_UNFOLD  },
+    { "ADD64ri32_DB",     "ADD64mi32",       NO_UNFOLD  },
+    { "ADD64ri8_DB",      "ADD64mi8",        NO_UNFOLD  },
+    { "ADD64rr_DB",       "ADD64mr",         NO_UNFOLD  },
+    { "ADD16rr_DB",       "ADD16rm",         NO_UNFOLD  },
+    { "ADD32rr_DB",       "ADD32rm",         NO_UNFOLD  },
+    { "ADD64rr_DB",       "ADD64rm",         NO_UNFOLD  },
+    { "PUSH16r",          "PUSH16rmm",       NO_UNFOLD  },
+    { "PUSH32r",          "PUSH32rmm",       NO_UNFOLD  },
+    { "PUSH64r",          "PUSH64rmm",       NO_UNFOLD  },
+    { "TAILJMPr",         "TAILJMPm",        UNFOLD },
+    { "TAILJMPr64",       "TAILJMPm64",      UNFOLD },
+    { "TAILJMPr64_REX",   "TAILJMPm64_REX",  UNFOLD },
+};
+
+
+static bool isExplicitAlign(const CodeGenInstruction *Inst) {
+  return any_of(ExplicitAlign, [Inst](const char *InstStr) {
+    return Inst->TheDef->getName().find(InstStr) != StringRef::npos;
+  });
+}
+
+static bool isExplicitUnalign(const CodeGenInstruction *Inst) {
+  return any_of(ExplicitUnalign, [Inst](const char *InstStr) {
+    return Inst->TheDef->getName().find(InstStr) != StringRef::npos;
+  });
+}
+
+class X86FoldTablesEmitter {
+  RecordKeeper &Records;
+  CodeGenTarget Target;
+
+  // Represents an entry in the folding table
+  class X86FoldTableEntry {
+    const CodeGenInstruction *RegInst;
+    const CodeGenInstruction *MemInst;
+
+  public:
+    bool CannotUnfold = false;
+    bool IsLoad = false;
+    bool IsStore = false;
+    bool IsAligned = false;
+    unsigned int Alignment = 0;
+
+    X86FoldTableEntry(const CodeGenInstruction *RegInst,
+                      const CodeGenInstruction *MemInst)
+        : RegInst(RegInst), MemInst(MemInst) {}
+
+    friend raw_ostream &operator<<(raw_ostream &OS,
+                                   const X86FoldTableEntry &E) {
+      OS << "{ X86::" << E.RegInst->TheDef->getName().str()
+         << ", X86::" << E.MemInst->TheDef->getName().str() << ", ";
+
+      if (E.IsLoad)
+        OS << "TB_FOLDED_LOAD | ";
+      if (E.IsStore)
+        OS << "TB_FOLDED_STORE | ";
+      if (E.CannotUnfold)
+        OS << "TB_NO_REVERSE | ";
+      if (E.IsAligned)
+        OS << "TB_ALIGN_" << E.Alignment << " | ";
+
+      OS << "0 },\n";
+
+      return OS;
+    }
+  };
+
+  typedef std::vector<X86FoldTableEntry> FoldTable;
+  // std::vector for each folding table.
+  // Table2Addr - Holds instructions which their memory form performs load+store
+  // Table#i - Holds instructions which the their memory form perform a load OR
+  //           a store,  and their #i'th operand is folded.
+  FoldTable Table2Addr;
+  FoldTable Table0;
+  FoldTable Table1;
+  FoldTable Table2;
+  FoldTable Table3;
+  FoldTable Table4;
+
+public:
+  X86FoldTablesEmitter(RecordKeeper &R) : Records(R), Target(R) {}
+
+  // run - Generate the 6 X86 memory fold tables.
+  void run(raw_ostream &OS);
+
+private:
+  // Decides to which table to add the entry with the given instructions.
+  // S sets the strategy of adding the TB_NO_REVERSE flag.
+  void updateTables(const CodeGenInstruction *RegInstr,
+                    const CodeGenInstruction *MemInstr,
+                    const UnfoldStrategy S = NO_STRATEGY);
+
+  // Generates X86FoldTableEntry with the given instructions and fill it with
+  // the appropriate flags - then adds it to Table.
+  void addEntryWithFlags(FoldTable &Table, const CodeGenInstruction *RegInstr,
+                         const CodeGenInstruction *MemInstr,
+                         const UnfoldStrategy S, const unsigned int FoldedInd);
+
+  // Print the given table as a static const C++ array of type
+  // X86MemoryFoldTableEntry.
+  void printTable(const FoldTable &Table, std::string TableName,
+                  raw_ostream &OS) {
+    OS << "static const X86MemoryFoldTableEntry MemoryFold" << TableName
+       << "[] = {\n";
+
+    for (const X86FoldTableEntry &E : Table)
+      OS << E;
+
+    OS << "};\n";
+  }
+};
+
+// Return true if one of the instruction's operands is a RST register class
+static bool hasRSTRegClass(const CodeGenInstruction *Inst) {
+  return any_of(Inst->Operands, [](const CGIOperandList::OperandInfo &OpIn) {
+    return OpIn.Rec->getName() == "RST";
+  });
+}
+
+// Return true if one of the instruction's operands is a ptr_rc_tailcall
+static bool hasPtrTailcallRegClass(const CodeGenInstruction *Inst) {
+  return any_of(Inst->Operands, [](const CGIOperandList::OperandInfo &OpIn) {
+    return OpIn.Rec->getName() == "ptr_rc_tailcall";
+  });
+}
+
+// Calculates the integer value representing the BitsInit object
+static inline uint64_t getValueFromBitsInit(const BitsInit *B) {
+  assert(B->getNumBits() <= sizeof(uint64_t) * 8 && "BitInits' too long!");
+
+  uint64_t Value = 0;
+  for (unsigned i = 0, e = B->getNumBits(); i != e; ++i) {
+    BitInit *Bit = cast<BitInit>(B->getBit(i));
+    Value |= uint64_t(Bit->getValue()) << i;
+  }
+  return Value;
+}
+
+// Returns true if the two given BitsInits represent the same integer value
+static inline bool equalBitsInits(const BitsInit *B1, const BitsInit *B2) {
+  if (B1->getNumBits() != B2->getNumBits())
+    PrintFatalError("Comparing two BitsInits with different sizes!");
+
+  for (unsigned i = 0, e = B1->getNumBits(); i != e; ++i) {
+    BitInit *Bit1 = cast<BitInit>(B1->getBit(i));
+    BitInit *Bit2 = cast<BitInit>(B2->getBit(i));
+    if (Bit1->getValue() != Bit2->getValue())
+      return false;
+  }
+  return true;
+}
+
+// Return the size of the register operand
+static inline unsigned int getRegOperandSize(const Record *RegRec) {
+  if (RegRec->isSubClassOf("RegisterOperand"))
+    RegRec = RegRec->getValueAsDef("RegClass");
+  if (RegRec->isSubClassOf("RegisterClass"))
+    return RegRec->getValueAsListOfDefs("RegTypes")[0]->getValueAsInt("Size");
+
+  llvm_unreachable("Register operand's size not known!");
+}
+
+// Return the size of the memory operand
+static inline unsigned int
+getMemOperandSize(const Record *MemRec, const bool IntrinsicSensitive = false) {
+  if (MemRec->isSubClassOf("Operand")) {
+    // Intrinsic memory instructions use ssmem/sdmem.
+    if (IntrinsicSensitive &&
+        (MemRec->getName() == "sdmem" || MemRec->getName() == "ssmem"))
+      return 128;
+
+    StringRef Name =
+        MemRec->getValueAsDef("ParserMatchClass")->getValueAsString("Name");
+    if (Name == "Mem8")
+      return 8;
+    if (Name == "Mem16")
+      return 16;
+    if (Name == "Mem32")
+      return 32;
+    if (Name == "Mem64")
+      return 64;
+    if (Name == "Mem80")
+      return 80;
+    if (Name == "Mem128")
+      return 128;
+    if (Name == "Mem256")
+      return 256;
+    if (Name == "Mem512")
+      return 512;
+  }
+
+  llvm_unreachable("Memory operand's size not known!");
+}
+
+// Returns true if the record's list of defs includes the given def.
+static inline bool hasDefInList(const Record *Rec, const StringRef List,
+                                const StringRef Def) {
+  if (!Rec->isValueUnset(List)) {
+    return any_of(*(Rec->getValueAsListInit(List)),
+                  [Def](const Init *I) { return I->getAsString() == Def; });
+  }
+  return false;
+}
+
+// Return true if the instruction defined as a register flavor.
+static inline bool hasRegisterFormat(const Record *Inst) {
+  const BitsInit *FormBits = Inst->getValueAsBitsInit("FormBits");
+  uint64_t FormBitsNum = getValueFromBitsInit(FormBits);
+
+  // Values from X86Local namespace defined in X86RecognizableInstr.cpp
+  return FormBitsNum >= X86Local::MRMDestReg && FormBitsNum <= X86Local::MRM7r;
+}
+
+// Return true if the instruction defined as a memory flavor.
+static inline bool hasMemoryFormat(const Record *Inst) {
+  const BitsInit *FormBits = Inst->getValueAsBitsInit("FormBits");
+  uint64_t FormBitsNum = getValueFromBitsInit(FormBits);
+
+  // Values from X86Local namespace defined in X86RecognizableInstr.cpp
+  return FormBitsNum >= X86Local::MRMDestMem && FormBitsNum <= X86Local::MRM7m;
+}
+
+static inline bool isNOREXRegClass(const Record *Op) {
+  return Op->getName().find("_NOREX") != StringRef::npos;
+}
+
+static inline bool isRegisterOperand(const Record *Rec) {
+  return Rec->isSubClassOf("RegisterClass") ||
+         Rec->isSubClassOf("RegisterOperand") ||
+         Rec->isSubClassOf("PointerLikeRegClass");
+}
+
+static inline bool isMemoryOperand(const Record *Rec) {
+  return Rec->isSubClassOf("Operand") &&
+         Rec->getValueAsString("OperandType") == "OPERAND_MEMORY";
+}
+
+static inline bool isImmediateOperand(const Record *Rec) {
+  return Rec->isSubClassOf("Operand") &&
+         Rec->getValueAsString("OperandType") == "OPERAND_IMMEDIATE";
+}
+
+// Get the alternative instruction pointed by "FoldGenRegForm" field.
+static inline const CodeGenInstruction *
+getAltRegInst(const CodeGenInstruction *I, const RecordKeeper &Records,
+              const CodeGenTarget &Target) {
+
+  StringRef AltRegInstStr = I->TheDef->getValueAsString("FoldGenRegForm");
+  Record *AltRegInstRec = Records.getDef(AltRegInstStr);
+  assert(AltRegInstRec &&
+         "Alternative register form instruction def not found");
+  CodeGenInstruction &AltRegInst = Target.getInstruction(AltRegInstRec);
+  return &AltRegInst;
+}
+
+// Function object - Operator() returns true if the given VEX instruction
+// matches the EVEX instruction of this object.
+class IsMatch {
+  const CodeGenInstruction *MemInst;
+
+public:
+  IsMatch(const CodeGenInstruction *Inst, const RecordKeeper &Records)
+      : MemInst(Inst) {}
+
+  bool operator()(const CodeGenInstruction *RegInst) {
+    Record *MemRec = MemInst->TheDef;
+    Record *RegRec = RegInst->TheDef;
+
+    // Return false if one (at least) of the encoding fields of both
+    // instructions do not match.
+    if (RegRec->getValueAsDef("OpEnc") != MemRec->getValueAsDef("OpEnc") ||
+        !equalBitsInits(RegRec->getValueAsBitsInit("Opcode"),
+                        MemRec->getValueAsBitsInit("Opcode")) ||
+        // VEX/EVEX fields
+        RegRec->getValueAsDef("OpPrefix") !=
+            MemRec->getValueAsDef("OpPrefix") ||
+        RegRec->getValueAsDef("OpMap") != MemRec->getValueAsDef("OpMap") ||
+        RegRec->getValueAsDef("OpSize") != MemRec->getValueAsDef("OpSize") ||
+        RegRec->getValueAsBit("hasVEX_4V") !=
+            MemRec->getValueAsBit("hasVEX_4V") ||
+        RegRec->getValueAsBit("hasEVEX_K") !=
+            MemRec->getValueAsBit("hasEVEX_K") ||
+        RegRec->getValueAsBit("hasEVEX_Z") !=
+            MemRec->getValueAsBit("hasEVEX_Z") ||
+        RegRec->getValueAsBit("hasEVEX_B") !=
+            MemRec->getValueAsBit("hasEVEX_B") ||
+        RegRec->getValueAsBit("hasEVEX_RC") !=
+            MemRec->getValueAsBit("hasEVEX_RC") ||
+        RegRec->getValueAsBit("hasREX_WPrefix") !=
+            MemRec->getValueAsBit("hasREX_WPrefix") ||
+        RegRec->getValueAsBit("hasLockPrefix") !=
+            MemRec->getValueAsBit("hasLockPrefix") ||
+        !equalBitsInits(RegRec->getValueAsBitsInit("EVEX_LL"),
+                        MemRec->getValueAsBitsInit("EVEX_LL")) ||
+        !equalBitsInits(RegRec->getValueAsBitsInit("VEX_WPrefix"),
+                        MemRec->getValueAsBitsInit("VEX_WPrefix")) ||
+        // Instruction's format - The register form's "Form" field should be
+        // the opposite of the memory form's "Form" field.
+        !areOppositeForms(RegRec->getValueAsBitsInit("FormBits"),
+                          MemRec->getValueAsBitsInit("FormBits")) ||
+        RegRec->getValueAsBit("isAsmParserOnly") !=
+            MemRec->getValueAsBit("isAsmParserOnly"))
+      return false;
+
+    // Make sure the sizes of the operands of both instructions suit each other.
+    // This is needed for instructions with intrinsic version (_Int).
+    // Where the only difference is the size of the operands.
+    // For example: VUCOMISDZrm and Int_VUCOMISDrm
+    // Also for instructions that their EVEX version was upgraded to work with
+    // k-registers. For example VPCMPEQBrm (xmm output register) and
+    // VPCMPEQBZ128rm (k register output register).
+    bool ArgFolded = false;
+    unsigned MemOutSize = MemRec->getValueAsDag("OutOperandList")->getNumArgs();
+    unsigned RegOutSize = RegRec->getValueAsDag("OutOperandList")->getNumArgs();
+    unsigned MemInSize = MemRec->getValueAsDag("InOperandList")->getNumArgs();
+    unsigned RegInSize = RegRec->getValueAsDag("InOperandList")->getNumArgs();
+
+    // Instructions with one output in their memory form use the memory folded
+    // operand as source and destination (Read-Modify-Write).
+    unsigned RegStartIdx =
+        (MemOutSize + 1 == RegOutSize) && (MemInSize == RegInSize) ? 1 : 0;
+
+    for (unsigned i = 0, e = MemInst->Operands.size(); i < e; i++) {
+      Record *MemOpRec = MemInst->Operands[i].Rec;
+      Record *RegOpRec = RegInst->Operands[i + RegStartIdx].Rec;
+
+      if (MemOpRec == RegOpRec)
+        continue;
+
+      if (isRegisterOperand(MemOpRec) && isRegisterOperand(RegOpRec)) {
+        if (getRegOperandSize(MemOpRec) != getRegOperandSize(RegOpRec) ||
+            isNOREXRegClass(MemOpRec) != isNOREXRegClass(RegOpRec))
+          return false;
+      } else if (isMemoryOperand(MemOpRec) && isMemoryOperand(RegOpRec)) {
+        if (getMemOperandSize(MemOpRec) != getMemOperandSize(RegOpRec))
+          return false;
+      } else if (isImmediateOperand(MemOpRec) && isImmediateOperand(RegOpRec)) {
+        if (MemOpRec->getValueAsDef("Type") != RegOpRec->getValueAsDef("Type"))
+          return false;
+      } else {
+        // Only one operand can be folded.
+        if (ArgFolded)
+          return false;
+
+        assert(isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec));
+        ArgFolded = true;
+      }
+    }
+
+    return true;
+  }
+
+private:
+  // Return true of the 2 given forms are the opposite of each other.
+  bool areOppositeForms(const BitsInit *RegFormBits,
+                        const BitsInit *MemFormBits) {
+    uint64_t MemFormNum = getValueFromBitsInit(MemFormBits);
+    uint64_t RegFormNum = getValueFromBitsInit(RegFormBits);
+
+    if ((MemFormNum == X86Local::MRM0m && RegFormNum == X86Local::MRM0r) ||
+        (MemFormNum == X86Local::MRM1m && RegFormNum == X86Local::MRM1r) ||
+        (MemFormNum == X86Local::MRM2m && RegFormNum == X86Local::MRM2r) ||
+        (MemFormNum == X86Local::MRM3m && RegFormNum == X86Local::MRM3r) ||
+        (MemFormNum == X86Local::MRM4m && RegFormNum == X86Local::MRM4r) ||
+        (MemFormNum == X86Local::MRM5m && RegFormNum == X86Local::MRM5r) ||
+        (MemFormNum == X86Local::MRM6m && RegFormNum == X86Local::MRM6r) ||
+        (MemFormNum == X86Local::MRM7m && RegFormNum == X86Local::MRM7r) ||
+        (MemFormNum == X86Local::MRMXm && RegFormNum == X86Local::MRMXr) ||
+        (MemFormNum == X86Local::MRMDestMem &&
+         RegFormNum == X86Local::MRMDestReg) ||
+        (MemFormNum == X86Local::MRMSrcMem &&
+         RegFormNum == X86Local::MRMSrcReg) ||
+        (MemFormNum == X86Local::MRMSrcMem4VOp3 &&
+         RegFormNum == X86Local::MRMSrcReg4VOp3) ||
+        (MemFormNum == X86Local::MRMSrcMemOp4 &&
+         RegFormNum == X86Local::MRMSrcRegOp4))
+      return true;
+
+    return false;
+  }
+};
+
+} // end anonymous namespace
+
+void X86FoldTablesEmitter::addEntryWithFlags(FoldTable &Table,
+                                             const CodeGenInstruction *RegInstr,
+                                             const CodeGenInstruction *MemInstr,
+                                             const UnfoldStrategy S,
+                                             const unsigned int FoldedInd) {
+
+  X86FoldTableEntry Result = X86FoldTableEntry(RegInstr, MemInstr);
+  Record *RegRec = RegInstr->TheDef;
+  Record *MemRec = MemInstr->TheDef;
+
+  // Only table0 entries should explicitly specify a load or store flag.
+  if (&Table == &Table0) {
+    unsigned MemInOpsNum = MemRec->getValueAsDag("InOperandList")->getNumArgs();
+    unsigned RegInOpsNum = RegRec->getValueAsDag("InOperandList")->getNumArgs();
+    // If the instruction writes to the folded operand, it will appear as an
+    // output in the register form instruction and as an input in the memory
+    // form instruction.
+    // If the instruction reads from the folded operand, it well appear as in
+    // input in both forms.
+    if (MemInOpsNum == RegInOpsNum)
+      Result.IsLoad = true;
+    else
+      Result.IsStore = true;
+  }
+
+  Record *RegOpRec = RegInstr->Operands[FoldedInd].Rec;
+  Record *MemOpRec = MemInstr->Operands[FoldedInd].Rec;
+
+  // Unfolding code generates a load/store instruction according to the size of
+  // the register in the register form instruction.
+  // If the register's size is greater than the memory's operand size, do not
+  // allow unfolding.
+  if (S == UNFOLD)
+    Result.CannotUnfold = false;
+  else if (S == NO_UNFOLD)
+    Result.CannotUnfold = true;
+  else if (getRegOperandSize(RegOpRec) > getMemOperandSize(MemOpRec))
+    Result.CannotUnfold = true; // S == NO_STRATEGY
+
+  uint64_t Enc = getValueFromBitsInit(RegRec->getValueAsBitsInit("OpEncBits"));
+  if (isExplicitAlign(RegInstr)) {
+    // The instruction require explicitly aligned memory.
+    BitsInit *VectSize = RegRec->getValueAsBitsInit("VectSize");
+    uint64_t Value = getValueFromBitsInit(VectSize);
+    Result.IsAligned = true;
+    Result.Alignment = Value;
+  } else if (Enc != X86Local::XOP && Enc != X86Local::VEX &&
+             Enc != X86Local::EVEX) {
+    // Instructions with VEX encoding do not require alignment.
+    if (!isExplicitUnalign(RegInstr) && getMemOperandSize(MemOpRec) > 64) {
+      // SSE packed vector instructions require a 16 byte alignment.
+      Result.IsAligned = true;
+      Result.Alignment = 16;
+    }
+  }
+
+  Table.push_back(Result);
+}
+
+void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInstr,
+                                        const CodeGenInstruction *MemInstr,
+                                        const UnfoldStrategy S) {
+
+  Record *RegRec = RegInstr->TheDef;
+  Record *MemRec = MemInstr->TheDef;
+  unsigned MemOutSize = MemRec->getValueAsDag("OutOperandList")->getNumArgs();
+  unsigned RegOutSize = RegRec->getValueAsDag("OutOperandList")->getNumArgs();
+  unsigned MemInSize = MemRec->getValueAsDag("InOperandList")->getNumArgs();
+  unsigned RegInSize = RegRec->getValueAsDag("InOperandList")->getNumArgs();
+
+  // Instructions which have the WriteRMW value (Read-Modify-Write) should be
+  // added to Table2Addr.
+  if (hasDefInList(MemRec, "SchedRW", "WriteRMW") && MemOutSize != RegOutSize &&
+      MemInSize == RegInSize) {
+    addEntryWithFlags(Table2Addr, RegInstr, MemInstr, S, 0);
+    return;
+  }
+
+  if (MemInSize == RegInSize && MemOutSize == RegOutSize) {
+    // Load-Folding cases.
+    // If the i'th register form operand is a register and the i'th memory form
+    // operand is a memory operand, add instructions to Table#i.
+    for (unsigned i = RegOutSize, e = RegInstr->Operands.size(); i < e; i++) {
+      Record *RegOpRec = RegInstr->Operands[i].Rec;
+      Record *MemOpRec = MemInstr->Operands[i].Rec;
+      if (isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec)) {
+        switch (i) {
+        case 0:
+          addEntryWithFlags(Table0, RegInstr, MemInstr, S, 0);
+          return;
+        case 1:
+          addEntryWithFlags(Table1, RegInstr, MemInstr, S, 1);
+          return;
+        case 2:
+          addEntryWithFlags(Table2, RegInstr, MemInstr, S, 2);
+          return;
+        case 3:
+          addEntryWithFlags(Table3, RegInstr, MemInstr, S, 3);
+          return;
+        case 4:
+          addEntryWithFlags(Table4, RegInstr, MemInstr, S, 4);
+          return;
+        }
+      }
+    }
+  } else if (MemInSize == RegInSize + 1 && MemOutSize + 1 == RegOutSize) {
+    // Store-Folding cases.
+    // If the memory form instruction performs performs a store, the *output*
+    // register of the register form instructions disappear and instead a
+    // memory *input* operand appears in the memory form instruction.
+    // For example:
+    //   MOVAPSrr => (outs VR128:$dst), (ins VR128:$src)
+    //   MOVAPSmr => (outs), (ins f128mem:$dst, VR128:$src)
+    Record *RegOpRec = RegInstr->Operands[RegOutSize - 1].Rec;
+    Record *MemOpRec = MemInstr->Operands[RegOutSize - 1].Rec;
+    if (isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec))
+      addEntryWithFlags(Table0, RegInstr, MemInstr, S, 0);
+  }
+
+  return;
+}
+
+void X86FoldTablesEmitter::run(raw_ostream &OS) {
+  emitSourceFileHeader("X86 fold tables", OS);
+
+  // Holds all memory instructions
+  std::vector<const CodeGenInstruction *> MemInsts;
+  // Holds all register instructions - divided according to opcode.
+  std::map<uint8_t, std::vector<const CodeGenInstruction *>> RegInsts;
+
+  ArrayRef<const CodeGenInstruction *> NumberedInstructions =
+      Target.getInstructionsByEnumValue();
+
+  for (const CodeGenInstruction *Inst : NumberedInstructions) {
+    if (!Inst->TheDef->getNameInit() || !Inst->TheDef->isSubClassOf("X86Inst"))
+      continue;
+
+    const Record *Rec = Inst->TheDef;
+
+    // - Do not proceed if the instruction is marked as notMemoryFoldable.
+    // - Instructions including RST register class operands are not relevant
+    //   for memory folding (for further details check the explanation in
+    //   lib/Target/X86/X86InstrFPStack.td file).
+    // - Some instructions (listed in the manual map above) use the register
+    //   class ptr_rc_tailcall, which can be of a size 32 or 64, to ensure
+    //   safe mapping of these instruction we manually map them and exclude
+    //   them from the automation.
+    if (Rec->getValueAsBit("isMemoryFoldable") == false ||
+        hasRSTRegClass(Inst) || hasPtrTailcallRegClass(Inst))
+      continue;
+
+    // Add all the memory form instructions to MemInsts, and all the register
+    // form instructions to RegInsts[Opc], where Opc in the opcode of each
+    // instructions. this helps reducing the runtime of the backend.
+    if (hasMemoryFormat(Rec))
+      MemInsts.push_back(Inst);
+    else if (hasRegisterFormat(Rec)) {
+      uint8_t Opc = getValueFromBitsInit(Rec->getValueAsBitsInit("Opcode"));
+      RegInsts[Opc].push_back(Inst);
+    }
+  }
+
+  // For each memory form instruction, try to find its register form
+  // instruction.
+  for (const CodeGenInstruction *MemInst : MemInsts) {
+    uint8_t Opc =
+        getValueFromBitsInit(MemInst->TheDef->getValueAsBitsInit("Opcode"));
+
+    if (RegInsts.count(Opc) == 0)
+      continue;
+
+    // Two forms (memory & register) of the same instruction must have the same
+    // opcode. try matching only with register form instructions with the same
+    // opcode.
+    std::vector<const CodeGenInstruction *> &OpcRegInsts =
+        RegInsts.find(Opc)->second;
+
+    auto Match = find_if(OpcRegInsts, IsMatch(MemInst, Records));
+    if (Match != OpcRegInsts.end()) {
+      const CodeGenInstruction *RegInst = *Match;
+      // If the matched instruction has it's "FoldGenRegForm" set, map the
+      // memory form instruction to the register form instruction pointed by
+      // this field
+      if (RegInst->TheDef->isValueUnset("FoldGenRegForm")) {
+        updateTables(RegInst, MemInst);
+      } else {
+        const CodeGenInstruction *AltRegInst =
+            getAltRegInst(RegInst, Records, Target);
+        updateTables(AltRegInst, MemInst);
+      }
+      OpcRegInsts.erase(Match);
+    }
+  }
+
+  // Add the manually mapped instructions listed above.
+  for (const ManualMapEntry &Entry : ManualMapSet) {
+    Record *RegInstIter = Records.getDef(Entry.RegInstStr);
+    Record *MemInstIter = Records.getDef(Entry.MemInstStr);
+
+    updateTables(&(Target.getInstruction(RegInstIter)),
+                 &(Target.getInstruction(MemInstIter)), Entry.Strategy);
+  }
+
+  // Print all tables to raw_ostream OS.
+  printTable(Table2Addr, "Table2Addr", OS);
+  printTable(Table0, "Table0", OS);
+  printTable(Table1, "Table1", OS);
+  printTable(Table2, "Table2", OS);
+  printTable(Table3, "Table3", OS);
+  printTable(Table4, "Table4", OS);
+}
+
+namespace llvm {
+
+void EmitX86FoldTables(RecordKeeper &RK, raw_ostream &OS) {
+  X86FoldTablesEmitter(RK).run(OS);
+}
+} // namespace llvm
diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp
index 202a71ae4dc4..9afdd7e09638 100644
--- a/utils/TableGen/X86RecognizableInstr.cpp
+++ b/utils/TableGen/X86RecognizableInstr.cpp
@@ -100,6 +100,9 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
 
   HasVEX_LPrefix   = Rec->getValueAsBit("hasVEX_L");
 
+  EncodeRC = HasEVEX_B &&
+             (Form == X86Local::MRMDestReg || Form == X86Local::MRMSrcReg);
+
   // Check for 64-bit inst which does not require REX
   Is32Bit = false;
   Is64Bit = false;
@@ -161,7 +164,7 @@ InstructionContext RecognizableInstr::insnContext() const {
       llvm_unreachable("Don't support VEX.L if EVEX_L2 is enabled");
     }
     // VEX_L & VEX_W
-    if (HasVEX_LPrefix && VEX_WPrefix == X86Local::VEX_W1) {
+    if (!EncodeRC && HasVEX_LPrefix && VEX_WPrefix == X86Local::VEX_W1) {
       if (OpPrefix == X86Local::PD)
         insnContext = EVEX_KB(IC_EVEX_L_W_OPSIZE);
       else if (OpPrefix == X86Local::XS)
@@ -174,7 +177,7 @@ InstructionContext RecognizableInstr::insnContext() const {
         errs() << "Instruction does not use a prefix: " << Name << "\n";
         llvm_unreachable("Invalid prefix");
       }
-    } else if (HasVEX_LPrefix) {
+    } else if (!EncodeRC && HasVEX_LPrefix) {
       // VEX_L
       if (OpPrefix == X86Local::PD)
         insnContext = EVEX_KB(IC_EVEX_L_OPSIZE);
@@ -188,8 +191,8 @@ InstructionContext RecognizableInstr::insnContext() const {
         errs() << "Instruction does not use a prefix: " << Name << "\n";
         llvm_unreachable("Invalid prefix");
       }
-    }
-    else if (HasEVEX_L2Prefix && VEX_WPrefix == X86Local::VEX_W1) {
+    } else if (!EncodeRC && HasEVEX_L2Prefix &&
+               VEX_WPrefix == X86Local::VEX_W1) {
       // EVEX_L2 & VEX_W
       if (OpPrefix == X86Local::PD)
         insnContext = EVEX_KB(IC_EVEX_L2_W_OPSIZE);
@@ -203,7 +206,7 @@ InstructionContext RecognizableInstr::insnContext() const {
         errs() << "Instruction does not use a prefix: " << Name << "\n";
         llvm_unreachable("Invalid prefix");
       }
-    } else if (HasEVEX_L2Prefix) {
+    } else if (!EncodeRC && HasEVEX_L2Prefix) {
       // EVEX_L2
       if (OpPrefix == X86Local::PD)
         insnContext = EVEX_KB(IC_EVEX_L2_OPSIZE);
@@ -703,7 +706,7 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
 #define MAP(from, to)                     \
   case X86Local::MRM_##from:
 
-  OpcodeType    opcodeType  = (OpcodeType)-1;
+  llvm::Optional<OpcodeType> opcodeType;
 
   ModRMFilter*  filter      = nullptr;
   uint8_t       opcodeToSet = 0;
@@ -783,8 +786,7 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
   case X86Local::AdSize64: AddressSize = 64; break;
   }
 
-  assert(opcodeType != (OpcodeType)-1 &&
-         "Opcode type not set");
+  assert(opcodeType && "Opcode type not set");
   assert(filter && "Filter not set");
 
   if (Form == X86Local::AddRegFrm) {
@@ -796,17 +798,14 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
     for (currentOpcode = opcodeToSet;
          currentOpcode < opcodeToSet + 8;
          ++currentOpcode)
-      tables.setTableFields(opcodeType,
-                            insnContext(),
-                            currentOpcode,
-                            *filter,
-                            UID, Is32Bit, IgnoresVEX_L, AddressSize);
+      tables.setTableFields(*opcodeType, insnContext(), currentOpcode, *filter,
+                            UID, Is32Bit, OpPrefix == 0,
+                            IgnoresVEX_L || EncodeRC,
+                            VEX_WPrefix == X86Local::VEX_WIG, AddressSize);
   } else {
-    tables.setTableFields(opcodeType,
-                          insnContext(),
-                          opcodeToSet,
-                          *filter,
-                          UID, Is32Bit, IgnoresVEX_L, AddressSize);
+    tables.setTableFields(*opcodeType, insnContext(), opcodeToSet, *filter, UID,
+                          Is32Bit, OpPrefix == 0, IgnoresVEX_L || EncodeRC,
+                          VEX_WPrefix == X86Local::VEX_WIG, AddressSize);
   }
 
   delete filter;
@@ -929,19 +928,19 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
   TYPE("VK64",                TYPE_VK)
   TYPE("VK64WM",              TYPE_VK)
   TYPE("GR32_NOAX",           TYPE_Rv)
-  TYPE("vx64mem",             TYPE_M)
-  TYPE("vx128mem",            TYPE_M)
-  TYPE("vx256mem",            TYPE_M)
-  TYPE("vy128mem",            TYPE_M)
-  TYPE("vy256mem",            TYPE_M)
-  TYPE("vx64xmem",            TYPE_M)
-  TYPE("vx128xmem",           TYPE_M)
-  TYPE("vx256xmem",           TYPE_M)
-  TYPE("vy128xmem",           TYPE_M)
-  TYPE("vy256xmem",           TYPE_M)
-  TYPE("vy512mem",            TYPE_M)
-  TYPE("vz256xmem",           TYPE_M)
-  TYPE("vz512mem",            TYPE_M)
+  TYPE("vx64mem",             TYPE_MVSIBX)
+  TYPE("vx128mem",            TYPE_MVSIBX)
+  TYPE("vx256mem",            TYPE_MVSIBX)
+  TYPE("vy128mem",            TYPE_MVSIBY)
+  TYPE("vy256mem",            TYPE_MVSIBY)
+  TYPE("vx64xmem",            TYPE_MVSIBX)
+  TYPE("vx128xmem",           TYPE_MVSIBX)
+  TYPE("vx256xmem",           TYPE_MVSIBX)
+  TYPE("vy128xmem",           TYPE_MVSIBY)
+  TYPE("vy256xmem",           TYPE_MVSIBY)
+  TYPE("vy512mem",            TYPE_MVSIBY)
+  TYPE("vz256xmem",           TYPE_MVSIBZ)
+  TYPE("vz512mem",            TYPE_MVSIBZ)
   TYPE("BNDR",                TYPE_BNDR)
   errs() << "Unhandled type string " << s << "\n";
   llvm_unreachable("Unhandled type string");
@@ -962,7 +961,7 @@ RecognizableInstr::immediateEncodingFromString(const std::string &s,
   ENCODING("XOPCC",           ENCODING_IB)
   ENCODING("AVXCC",           ENCODING_IB)
   ENCODING("AVX512ICC",       ENCODING_IB)
-  ENCODING("AVX512RC",        ENCODING_IB)
+  ENCODING("AVX512RC",        ENCODING_IRC)
   ENCODING("i16imm",          ENCODING_Iv)
   ENCODING("i16i8imm",        ENCODING_IB)
   ENCODING("i32imm",          ENCODING_Iv)
diff --git a/utils/TableGen/X86RecognizableInstr.h b/utils/TableGen/X86RecognizableInstr.h
index ea99935f8790..24509d16d638 100644
--- a/utils/TableGen/X86RecognizableInstr.h
+++ b/utils/TableGen/X86RecognizableInstr.h
@@ -191,6 +191,8 @@ private:
   bool HasEVEX_KZ;
   /// The hasEVEX_B field from the record
   bool HasEVEX_B;
+  /// Indicates that the instruction uses the L and L' fields for RC.
+  bool EncodeRC;
   /// The isCodeGenOnly field from the record
   bool IsCodeGenOnly;
   /// The ForceDisassemble field from the record
diff --git a/utils/docker/build_docker_image.sh b/utils/docker/build_docker_image.sh
index 33f690ad5c43..ad7831925f9a 100755
--- a/utils/docker/build_docker_image.sh
+++ b/utils/docker/build_docker_image.sh
@@ -15,7 +15,7 @@ DOCKER_TAG=""
 BUILDSCRIPT_ARGS=""
 
 function show_usage() {
-  usage=$(cat << EOF
+  cat << EOF
 Usage: build_docker_image.sh [options] [-- [cmake_args]...]
 
 Available options:
@@ -38,6 +38,9 @@ Available options:
                         Can be specified multiple times.
     -i|--install-target name of a cmake install target to build and include in
                         the resulting archive. Can be specified multiple times.
+    -c|--checksums      name of a file, containing checksums of llvm checkout.
+                        Script will fail if checksums of the checkout do not
+                        match.
 
 Required options: --source and --docker-repository, at least one
   --install-target.
@@ -64,10 +67,9 @@ $ ./build_docker_image.sh -s debian8 -d mydocker/clang-debian8 -t "latest" \
     -DCLANG_ENABLE_BOOTSTRAP=ON \ 
     -DCLANG_BOOTSTRAP_TARGETS="install-clang;install-clang-headers"
 EOF
-)
-  echo "$usage"
 }
 
+CHECKSUMS_FILE=""
 SEEN_INSTALL_TARGET=0
 while [[ $# -gt 0 ]]; do
   case "$1" in
@@ -97,6 +99,11 @@ while [[ $# -gt 0 ]]; do
       BUILDSCRIPT_ARGS="$BUILDSCRIPT_ARGS $1 $2"
       shift 2
       ;;
+    -c|--checksums)
+      shift
+      CHECKSUMS_FILE="$1"
+      shift
+      ;;
     --)
       shift
       BUILDSCRIPT_ARGS="$BUILDSCRIPT_ARGS -- $*"
@@ -130,30 +137,41 @@ if [ $SEEN_INSTALL_TARGET -eq 0 ]; then
   exit 1
 fi
 
-cd $(dirname $0)
-if [ ! -d $IMAGE_SOURCE ]; then
-  echo "No sources for '$IMAGE_SOURCE' were found in $PWD"
+SOURCE_DIR=$(dirname $0)
+if [ ! -d "$SOURCE_DIR/$IMAGE_SOURCE" ]; then
+  echo "No sources for '$IMAGE_SOURCE' were found in $SOURCE_DIR"
   exit 1
 fi
 
-echo "Building from $IMAGE_SOURCE"
+BUILD_DIR=$(mktemp -d)
+trap "rm -rf $BUILD_DIR" EXIT
+echo "Using a temporary directory for the build: $BUILD_DIR"
+
+cp -r "$SOURCE_DIR/$IMAGE_SOURCE" "$BUILD_DIR/$IMAGE_SOURCE"
+cp -r "$SOURCE_DIR/scripts" "$BUILD_DIR/scripts"
+
+mkdir "$BUILD_DIR/checksums"
+if [ "$CHECKSUMS_FILE" != "" ]; then
+  cp "$CHECKSUMS_FILE" "$BUILD_DIR/checksums/checksums.txt"
+fi
 
 if [ "$DOCKER_TAG" != "" ]; then
   DOCKER_TAG=":$DOCKER_TAG"
 fi
 
+echo "Building from $IMAGE_SOURCE"
 echo "Building $DOCKER_REPOSITORY-build$DOCKER_TAG"
 docker build -t "$DOCKER_REPOSITORY-build$DOCKER_TAG" \
   --build-arg "buildscript_args=$BUILDSCRIPT_ARGS" \
-  -f "$IMAGE_SOURCE/build/Dockerfile" .
+  -f "$BUILD_DIR/$IMAGE_SOURCE/build/Dockerfile" \
+  "$BUILD_DIR"
 
 echo "Copying clang installation to release image sources"
-docker run -v "$PWD/$IMAGE_SOURCE:/workspace" "$DOCKER_REPOSITORY-build$DOCKER_TAG" \
+docker run -v "$BUILD_DIR/$IMAGE_SOURCE:/workspace" "$DOCKER_REPOSITORY-build$DOCKER_TAG" \
   cp /tmp/clang.tar.gz /workspace/release
-trap "rm -f $PWD/$IMAGE_SOURCE/release/clang.tar.gz" EXIT
 
 echo "Building release image"
 docker build -t "${DOCKER_REPOSITORY}${DOCKER_TAG}" \
-  "$IMAGE_SOURCE/release"
+  "$BUILD_DIR/$IMAGE_SOURCE/release"
 
 echo "Done"
diff --git a/utils/docker/debian8/build/Dockerfile b/utils/docker/debian8/build/Dockerfile
index 13a11a73be6c..5c5ed6744963 100644
--- a/utils/docker/debian8/build/Dockerfile
+++ b/utils/docker/debian8/build/Dockerfile
@@ -18,18 +18,30 @@ RUN grep deb /etc/apt/sources.list | \
 
 # Install compiler, python and subversion.
 RUN apt-get update && \
-    apt-get install -y --no-install-recommends build-essential python2.7 wget \
-            subversion ninja-build && \
+    apt-get install -y --no-install-recommends ca-certificates gnupg \
+           build-essential python wget subversion ninja-build && \
     rm -rf /var/lib/apt/lists/*
 
-# Install cmake version that can compile clang into /usr/local.
+# Import public key required for verifying signature of cmake download.
+RUN gpg --keyserver hkp://pgp.mit.edu --recv 0x2D2CEF1034921684
+
+# Download, verify and install cmake version that can compile clang into /usr/local.
 # (Version in debian8 repos is is too old)
-RUN wget -O - "https://cmake.org/files/v3.7/cmake-3.7.2-Linux-x86_64.tar.gz" | \
-    tar xzf - -C /usr/local --strip-components=1
+RUN mkdir /tmp/cmake-install && cd /tmp/cmake-install && \
+    wget "https://cmake.org/files/v3.7/cmake-3.7.2-SHA-256.txt.asc" && \
+    wget "https://cmake.org/files/v3.7/cmake-3.7.2-SHA-256.txt" && \
+    gpg --verify cmake-3.7.2-SHA-256.txt.asc cmake-3.7.2-SHA-256.txt && \
+    wget "https://cmake.org/files/v3.7/cmake-3.7.2-Linux-x86_64.tar.gz" && \
+    ( grep "cmake-3.7.2-Linux-x86_64.tar.gz" cmake-3.7.2-SHA-256.txt | \
+      sha256sum -c - ) && \
+    tar xzf cmake-3.7.2-Linux-x86_64.tar.gz -C /usr/local --strip-components=1 && \
+    cd / && rm -rf /tmp/cmake-install
+
+ADD checksums /tmp/checksums
+ADD scripts /tmp/scripts
 
 # Arguments passed to build_install_clang.sh.
 ARG buildscript_args
 
 # Run the build. Results of the build will be available as /tmp/clang.tar.gz.
-ADD scripts/build_install_llvm.sh /tmp
-RUN /tmp/build_install_llvm.sh ${buildscript_args}
+RUN /tmp/scripts/build_install_llvm.sh ${buildscript_args}
diff --git a/utils/docker/debian8/release/Dockerfile b/utils/docker/debian8/release/Dockerfile
index d0214b9c67af..3a44a7d41166 100644
--- a/utils/docker/debian8/release/Dockerfile
+++ b/utils/docker/debian8/release/Dockerfile
@@ -12,10 +12,10 @@ FROM launcher.gcr.io/google/debian8:latest
 
 LABEL maintainer "LLVM Developers"
 
-# Install packages for minimal usefull image.
+# Install packages for minimal useful image.
 RUN apt-get update && \
     apt-get install -y --no-install-recommends libstdc++-4.9-dev binutils && \
     rm -rf /var/lib/apt/lists/*
 
 # Unpack clang installation into this image.
-ADD clang.tar.gz /
+ADD clang.tar.gz /usr/local/
diff --git a/utils/docker/example/build/Dockerfile b/utils/docker/example/build/Dockerfile
index 597ccfeb4f23..be077f59f48a 100644
--- a/utils/docker/example/build/Dockerfile
+++ b/utils/docker/example/build/Dockerfile
@@ -18,9 +18,11 @@ LABEL maintainer "Maintainer <maintainer@email>"
 # FIXME: Install llvm/clang build dependencies. Including compiler to
 # build stage1, cmake, subversion, ninja, etc.
 
-# Arguments to pass to build_install_clang.sh.
+ADD checksums /tmp/checksums
+ADD scripts /tmp/scripts
+
+# Arguments passed to build_install_clang.sh.
 ARG buildscript_args
 
 # Run the build. Results of the build will be available as /tmp/clang.tar.gz.
-ADD scripts/build_install_llvm.sh /tmp
-RUN /tmp/build_install_llvm.sh ${buildscript_args}
+RUN /tmp/scripts/build_install_llvm.sh ${buildscript_args}
diff --git a/utils/docker/example/release/Dockerfile b/utils/docker/example/release/Dockerfile
index 953d81fc9951..b088ad885ac5 100644
--- a/utils/docker/example/release/Dockerfile
+++ b/utils/docker/example/release/Dockerfile
@@ -17,8 +17,8 @@ FROM ubuntu
 LABEL maintainer "Maintainer <maintainer@email>"
 
 # FIXME: Install all packages you want to have in your release container.
-# A minimal usefull installation must include libstdc++ and binutils.
+# A minimal useful installation must include libstdc++ and binutils.
 
 # Unpack clang installation into this container.
 # It is copied to this directory by build_docker_image.sh script.
-ADD clang.tar.gz /
+ADD clang.tar.gz /usr/local/
diff --git a/utils/docker/nvidia-cuda/build/Dockerfile b/utils/docker/nvidia-cuda/build/Dockerfile
index 619b80cbb61a..cd353a2578bd 100644
--- a/utils/docker/nvidia-cuda/build/Dockerfile
+++ b/utils/docker/nvidia-cuda/build/Dockerfile
@@ -17,9 +17,15 @@ ARG buildscript_args
 
 # Install llvm build dependencies.
 RUN apt-get update && \
-    apt-get install -y --no-install-recommends cmake python2.7 subversion ninja-build && \
+    apt-get install -y --no-install-recommends ca-certificates cmake python \
+		    subversion ninja-build && \
     rm -rf /var/lib/apt/lists/*
 
+ADD checksums /tmp/checksums
+ADD scripts /tmp/scripts
+
+# Arguments passed to build_install_clang.sh.
+ARG buildscript_args
+
 # Run the build. Results of the build will be available as /tmp/clang.tar.gz.
-ADD scripts/build_install_llvm.sh /tmp
-RUN /tmp/build_install_llvm.sh ${buildscript_args}
+RUN /tmp/scripts/build_install_llvm.sh ${buildscript_args}
diff --git a/utils/docker/nvidia-cuda/release/Dockerfile b/utils/docker/nvidia-cuda/release/Dockerfile
index b9bcae159780..a30d7d7e91ee 100644
--- a/utils/docker/nvidia-cuda/release/Dockerfile
+++ b/utils/docker/nvidia-cuda/release/Dockerfile
@@ -18,6 +18,6 @@ FROM nvidia/cuda:8.0-devel
 LABEL maintainer "LLVM Developers"
 
 # Unpack clang installation into this container.
-ADD clang.tar.gz /
+ADD clang.tar.gz /usr/local/
 
 # C++ standard library and binutils are already included in the base package.
diff --git a/utils/docker/scripts/build_install_llvm.sh b/utils/docker/scripts/build_install_llvm.sh
index aef4e0cbca2c..79ce7e50efdc 100755
--- a/utils/docker/scripts/build_install_llvm.sh
+++ b/utils/docker/scripts/build_install_llvm.sh
@@ -11,7 +11,7 @@
 set -e
 
 function show_usage() {
-  usage=$(cat << EOF
+  cat << EOF
 Usage: build_install_llvm.sh [options] -- [cmake-args]
 
 Checkout svn sources and run cmake with the specified arguments. Used
@@ -37,8 +37,6 @@ Required options: At least one --install-target.
 
 All options after '--' are passed to CMake invocation.
 EOF
-)
-  echo "$usage"
 }
 
 LLVM_SVN_REV=""
@@ -48,6 +46,7 @@ CMAKE_INSTALL_TARGETS=""
 # We always checkout llvm
 LLVM_PROJECTS="llvm"
 CMAKE_LLVM_ENABLE_PROJECTS=""
+CLANG_TOOLS_EXTRA_ENABLED=0
 
 function contains_project() {
   local TARGET_PROJ="$1"
@@ -60,6 +59,17 @@ function contains_project() {
   return 1
 }
 
+function append_project() {
+  local PROJ="$1"
+
+  LLVM_PROJECTS="$LLVM_PROJECTS $PROJ"
+  if [ "$CMAKE_LLVM_ENABLE_PROJECTS" != "" ]; then
+    CMAKE_LLVM_ENABLE_PROJECTS="$CMAKE_LLVM_ENABLE_PROJECTS;$PROJ"
+  else
+    CMAKE_LLVM_ENABLE_PROJECTS="$PROJ"
+  fi
+}
+
 while [[ $# -gt 0 ]]; do
   case "$1" in
     -r|--revision)
@@ -75,19 +85,27 @@ while [[ $# -gt 0 ]]; do
     -p|--llvm-project)
       shift
       PROJ="$1"
+      shift
+
       if [ "$PROJ" == "cfe" ]; then
         PROJ="clang"
       fi
-      if ! contains_project "$PROJ" ; then
-        LLVM_PROJECTS="$LLVM_PROJECTS $PROJ"
-        if [ "$CMAKE_LLVM_ENABLE_PROJECTS" != "" ]; then
-          CMAKE_LLVM_ENABLE_PROJECTS="$CMAKE_LLVM_ENABLE_PROJECTS;"
+
+      if [ "$PROJ" == "clang-tools-extra" ]; then
+        if [ $CLANG_TOOLS_EXTRA_ENABLED -ne 0 ]; then
+          echo "Project 'clang-tools-extra' is already enabled, ignoring extra occurences."
+        else
+          CLANG_TOOLS_EXTRA_ENABLED=1
         fi
-        CMAKE_LLVM_ENABLE_PROJECTS="$CMAKE_LLVM_ENABLED_PROJECTS$PROJ"
+
+        continue
+      fi
+
+      if ! contains_project "$PROJ" ; then
+        append_project "$PROJ"
       else
         echo "Project '$PROJ' is already enabled, ignoring extra occurences."
       fi
-      shift
       ;;
     -i|--install-target)
       shift
@@ -114,14 +132,25 @@ if [ "$CMAKE_INSTALL_TARGETS" == "" ]; then
   exit 1
 fi
 
+if [ $CLANG_TOOLS_EXTRA_ENABLED -ne 0 ]; then
+  if ! contains_project "clang"; then
+    echo "Project 'clang-tools-extra' was enabled without 'clang'."
+    echo "Adding 'clang' to a list of projects."
+
+    append_project "clang"
+  fi
+fi
+
 if [ "$LLVM_BRANCH" == "" ]; then
   LLVM_BRANCH="trunk"
 fi
 
-if [ "$LLVM_SVN_REVISION" != "" ]; then
-  SVN_REV_ARG="-r$LLVM_SVN_REVISION"
+if [ "$LLVM_SVN_REV" != "" ]; then
+  SVN_REV_ARG="-r$LLVM_SVN_REV"
+  echo "Checking out svn revision r$LLVM_SVN_REV."
 else
   SVN_REV_ARG=""
+  echo "Checking out latest svn revision."
 fi
 
 CLANG_BUILD_DIR=/tmp/clang-build
@@ -140,14 +169,28 @@ for LLVM_PROJECT in $LLVM_PROJECTS; do
   fi
 
   echo "Checking out https://llvm.org/svn/llvm-project/$SVN_PROJECT to $CLANG_BUILD_DIR/src/$LLVM_PROJECT"
-  # FIXME: --trust-server-cert is required to workaround 'SSL issuer is not
-  #        trusted' error. Using https seems preferable to http either way,
-  #        albeit this is not secure.
-  svn co -q $SVN_REV_ARG --trust-server-cert \
+  svn co -q $SVN_REV_ARG \
     "https://llvm.org/svn/llvm-project/$SVN_PROJECT/$LLVM_BRANCH" \
     "$CLANG_BUILD_DIR/src/$LLVM_PROJECT"
 done
 
+if [ $CLANG_TOOLS_EXTRA_ENABLED -ne 0 ]; then
+  echo "Checking out https://llvm.org/svn/llvm-project/clang-tools-extra to $CLANG_BUILD_DIR/src/clang/tools/extra"
+  svn co -q $SVN_REV_ARG \
+    "https://llvm.org/svn/llvm-project/clang-tools-extra/$LLVM_BRANCH" \
+    "$CLANG_BUILD_DIR/src/clang/tools/extra"
+fi
+
+CHECKSUMS_FILE="/tmp/checksums/checksums.txt"
+
+if [ -f "$CHECKSUMS_FILE" ]; then
+  echo "Validating checksums for LLVM checkout..."
+  python "$(dirname $0)/llvm_checksum/llvm_checksum.py" -c "$CHECKSUMS_FILE" \
+    --partial --multi_dir "$CLANG_BUILD_DIR/src"
+else
+  echo "Skipping checksumming checks..."
+fi
+
 mkdir "$CLANG_BUILD_DIR/build"
 pushd "$CLANG_BUILD_DIR/build"
 
diff --git a/utils/docker/scripts/llvm_checksum/llvm_checksum.py b/utils/docker/scripts/llvm_checksum/llvm_checksum.py
new file mode 100755
index 000000000000..584efa2598bf
--- /dev/null
+++ b/utils/docker/scripts/llvm_checksum/llvm_checksum.py
@@ -0,0 +1,198 @@
+#!/usr/bin/python
+""" A small program to compute checksums of LLVM checkout.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import hashlib
+import logging
+import re
+import sys
+from argparse import ArgumentParser
+from project_tree import *
+
+SVN_DATES_REGEX = re.compile(r"\$(Date|LastChangedDate)[^\$]+\$")
+
+
+def main():
+  parser = ArgumentParser()
+  parser.add_argument(
+      "-v", "--verbose", action="store_true", help="enable debug logging")
+  parser.add_argument(
+      "-c",
+      "--check",
+      metavar="reference_file",
+      help="read checksums from reference_file and " +
+      "check they match checksums of llvm_path.")
+  parser.add_argument(
+      "--partial",
+      action="store_true",
+      help="ignore projects from reference_file " +
+      "that are not checked out in llvm_path.")
+  parser.add_argument(
+      "--multi_dir",
+      action="store_true",
+      help="indicates llvm_path contains llvm, checked out " +
+      "into multiple directories, as opposed to a " +
+      "typical single source tree checkout.")
+  parser.add_argument("llvm_path")
+
+  args = parser.parse_args()
+  if args.check is not None:
+    with open(args.check, "r") as f:
+      reference_checksums = ReadLLVMChecksums(f)
+  else:
+    reference_checksums = None
+
+  if args.verbose:
+    logging.basicConfig(level=logging.DEBUG)
+
+  llvm_projects = CreateLLVMProjects(not args.multi_dir)
+  checksums = ComputeLLVMChecksums(args.llvm_path, llvm_projects)
+
+  if reference_checksums is None:
+    WriteLLVMChecksums(checksums, sys.stdout)
+    sys.exit(0)
+
+  if not ValidateChecksums(reference_checksums, checksums, args.partial):
+    sys.stdout.write("Checksums differ.\nNew checksums:\n")
+    WriteLLVMChecksums(checksums, sys.stdout)
+    sys.stdout.write("Reference checksums:\n")
+    WriteLLVMChecksums(reference_checksums, sys.stdout)
+    sys.exit(1)
+  else:
+    sys.stdout.write("Checksums match.")
+
+
+def ComputeLLVMChecksums(root_path, projects):
+  """Compute checksums for LLVM sources checked out using svn.
+
+  Args:
+    root_path: a directory of llvm checkout.
+    projects: a list of LLVMProject instances, which describe checkout paths,
+      relative to root_path.
+
+  Returns:
+    A dict mapping from project name to project checksum.
+  """
+  hash_algo = hashlib.sha256
+
+  def collapse_svn_substitutions(contents):
+    # Replace svn substitutions for $Date$ and $LastChangedDate$.
+    # Unfortunately, these are locale-specific.
+    return SVN_DATES_REGEX.sub("$\1$", contents)
+
+  def read_and_collapse_svn_subsitutions(file_path):
+    with open(file_path, "rb") as f:
+      contents = f.read()
+      new_contents = collapse_svn_substitutions(contents)
+      if contents != new_contents:
+        logging.debug("Replaced svn keyword substitutions in %s", file_path)
+        logging.debug("\n\tBefore\n%s\n\tAfter\n%s", contents, new_contents)
+      return new_contents
+
+  project_checksums = dict()
+  # Hash each project.
+  for proj in projects:
+    project_root = os.path.join(root_path, proj.relpath)
+    if not os.path.exists(project_root):
+      logging.info("Folder %s doesn't exist, skipping project %s", proj.relpath,
+                   proj.name)
+      continue
+
+    files = list()
+
+    def add_file_hash(file_path):
+      if os.path.islink(file_path) and not os.path.exists(file_path):
+        content = os.readlink(file_path)
+      else:
+        content = read_and_collapse_svn_subsitutions(file_path)
+      hasher = hash_algo()
+      hasher.update(content)
+      file_digest = hasher.hexdigest()
+      logging.debug("Checksum %s for file %s", file_digest, file_path)
+      files.append((file_path, file_digest))
+
+    logging.info("Computing checksum for %s", proj.name)
+    WalkProjectFiles(root_path, projects, proj, add_file_hash)
+
+    # Compute final checksum.
+    files.sort(key=lambda x: x[0])
+    hasher = hash_algo()
+    for file_path, file_digest in files:
+      file_path = os.path.relpath(file_path, project_root)
+      hasher.update(file_path)
+      hasher.update(file_digest)
+    project_checksums[proj.name] = hasher.hexdigest()
+  return project_checksums
+
+
+def WriteLLVMChecksums(checksums, f):
+  """Writes checksums to a text file.
+
+  Args:
+    checksums: a dict mapping from project name to project checksum (result of
+      ComputeLLVMChecksums).
+    f: a file object to write into.
+  """
+
+  for proj in sorted(checksums.keys()):
+    f.write("{} {}\n".format(checksums[proj], proj))
+
+
+def ReadLLVMChecksums(f):
+  """Reads checksums from a text file, produced by WriteLLVMChecksums.
+
+  Returns:
+    A dict, mapping from project name to project checksum.
+  """
+  checksums = {}
+  while True:
+    line = f.readline()
+    if line == "":
+      break
+    checksum, proj = line.split()
+    checksums[proj] = checksum
+  return checksums
+
+
+def ValidateChecksums(reference_checksums,
+                      new_checksums,
+                      allow_missing_projects=False):
+  """Validates that reference_checksums and new_checksums match.
+
+  Args:
+    reference_checksums: a dict of reference checksums, mapping from a project
+      name to a project checksum.
+    new_checksums: a dict of checksums to be checked, mapping from a project
+      name to a project checksum.
+    allow_missing_projects:
+      When True, reference_checksums may contain more projects than
+        new_checksums. Projects missing from new_checksums are ignored.
+      When False, new_checksums and reference_checksums must contain checksums
+        for the same set of projects. If there is a project in
+        reference_checksums, missing from new_checksums, ValidateChecksums
+        will return False.
+
+  Returns:
+    True, if checksums match with regards to allow_missing_projects flag value.
+    False, otherwise.
+  """
+  if not allow_missing_projects:
+    if len(new_checksums) != len(reference_checksums):
+      return False
+
+  for proj, checksum in new_checksums.iteritems():
+    # We never computed a checksum for this project.
+    if proj not in reference_checksums:
+      return False
+    # Checksum did not match.
+    if reference_checksums[proj] != checksum:
+      return False
+
+  return True
+
+
+if __name__ == "__main__":
+  main()
diff --git a/utils/docker/scripts/llvm_checksum/project_tree.py b/utils/docker/scripts/llvm_checksum/project_tree.py
new file mode 100644
index 000000000000..31d8703ba234
--- /dev/null
+++ b/utils/docker/scripts/llvm_checksum/project_tree.py
@@ -0,0 +1,95 @@
+"""Contains helper functions to compute checksums for LLVM checkouts.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import logging
+import os
+import os.path
+import sys
+
+
+class LLVMProject(object):
+  """An LLVM project with a descriptive name and a relative checkout path.
+  """
+
+  def __init__(self, name, relpath):
+    self.name = name
+    self.relpath = relpath
+
+  def is_subproject(self, other_project):
+    """ Check if self is checked out as a subdirectory of other_project.
+    """
+    return self.relpath.startswith(other_project.relpath)
+
+
+def WalkProjectFiles(checkout_root, all_projects, project, visitor):
+  """ Walk over all files inside a project without recursing into subprojects, '.git' and '.svn' subfolders.
+
+    checkout_root: root of the LLVM checkout.
+    all_projects: projects in the LLVM checkout.
+    project: a project to walk the files of. Must be inside all_projects.
+    visitor: a function called on each visited file.
+  """
+  assert project in all_projects
+
+  ignored_paths = set()
+  for other_project in all_projects:
+    if other_project != project and other_project.is_subproject(project):
+      ignored_paths.add(os.path.join(checkout_root, other_project.relpath))
+
+  def raise_error(err):
+    raise err
+
+  project_root = os.path.join(checkout_root, project.relpath)
+  for root, dirs, files in os.walk(project_root, onerror=raise_error):
+    dirs[:] = [
+        d for d in dirs
+        if d != ".svn" and d != ".git" and
+        os.path.join(root, d) not in ignored_paths
+    ]
+    for f in files:
+      visitor(os.path.join(root, f))
+
+
+def CreateLLVMProjects(single_tree_checkout):
+  """Returns a list of LLVMProject instances, describing relative paths of a typical LLVM checkout.
+
+  Args:
+    single_tree_checkout:
+      When True, relative paths for each project points to a typical single
+        source tree checkout.
+      When False, relative paths for each projects points to a separate
+        directory. However, clang-tools-extra is an exception, its relative path
+        will always be 'clang/tools/extra'.
+  """
+  # FIXME: cover all of llvm projects.
+
+  # Projects that reside inside 'projects/' in a single source tree checkout.
+  ORDINARY_PROJECTS = [
+      "compiler-rt", "dragonegg", "libcxx", "libcxxabi", "libunwind",
+      "parallel-libs", "test-suite"
+  ]
+  # Projects that reside inside 'tools/' in a single source tree checkout.
+  TOOLS_PROJECTS = ["clang", "lld", "lldb", "llgo"]
+
+  if single_tree_checkout:
+    projects = [LLVMProject("llvm", "")]
+    projects += [
+        LLVMProject(p, os.path.join("projects", p)) for p in ORDINARY_PROJECTS
+    ]
+    projects += [
+        LLVMProject(p, os.path.join("tools", p)) for p in TOOLS_PROJECTS
+    ]
+    projects.append(
+        LLVMProject("clang-tools-extra",
+                    os.path.join("tools", "clang", "tools", "extra")))
+  else:
+    projects = [LLVMProject("llvm", "llvm")]
+    projects += [LLVMProject(p, p) for p in ORDINARY_PROJECTS]
+    projects += [LLVMProject(p, p) for p in TOOLS_PROJECTS]
+    projects.append(
+        LLVMProject("clang-tools-extra", os.path.join("clang", "tools",
+                                                      "extra")))
+  return projects
diff --git a/utils/git-svn/git-llvm b/utils/git-svn/git-llvm
index 70b63f199494..0d566dac430a 100755
--- a/utils/git-svn/git-llvm
+++ b/utils/git-svn/git-llvm
@@ -107,7 +107,7 @@ def shell(cmd, strip=True, cwd=None, stdin=None, die_on_failure=True,
 
     start = time.time()
     p = subprocess.Popen(cmd, cwd=cwd, stdout=subprocess.PIPE, stderr=err_pipe,
-                         stdin=subprocess.PIPE)
+                         stdin=subprocess.PIPE, universal_newlines=True)
     stdout, stderr = p.communicate(input=stdin)
     elapsed = time.time() - start
 
diff --git a/utils/lit/CMakeLists.txt b/utils/lit/CMakeLists.txt
new file mode 100644
index 000000000000..43caf09f1402
--- /dev/null
+++ b/utils/lit/CMakeLists.txt
@@ -0,0 +1,32 @@
+# The configured file is not placed in the correct location
+# until the tests are run as we need to copy it into
+# a copy of the tests folder
+configure_lit_site_cfg(
+  "${CMAKE_CURRENT_SOURCE_DIR}/tests/lit.site.cfg.in"
+  "${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg"
+  OUTPUT_MAPPING
+  "${CMAKE_CURRENT_BINARY_DIR}/tests/lit.site.cfg"
+  )
+
+# Lit's test suite creates output files next to the sources which makes the
+# source tree dirty. This is undesirable because we do out of source builds.
+# To work around this the tests and the configuration file are copied into the
+# build directory just before running them. The tests are not copied over at
+# configure time (i.e. `file(COPY ...)`) because this could lead to stale
+# tests being run.
+add_custom_target(prepare-check-lit
+  COMMAND ${CMAKE_COMMAND} -E remove_directory "${CMAKE_CURRENT_BINARY_DIR}/tests"
+  COMMAND ${CMAKE_COMMAND} -E copy_directory "${CMAKE_CURRENT_SOURCE_DIR}/tests" "${CMAKE_CURRENT_BINARY_DIR}/tests"
+  COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg" "${CMAKE_CURRENT_BINARY_DIR}/tests"
+  COMMENT "Preparing lit tests"
+)
+
+# Add rules for lit's own test suite
+add_lit_testsuite(check-lit "Running lit's tests"
+  ${CMAKE_CURRENT_BINARY_DIR}
+  DEPENDS "FileCheck" "not" "prepare-check-lit"
+)
+
+# For IDEs
+set_target_properties(check-lit PROPERTIES FOLDER "Tests")
+set_target_properties(prepare-check-lit PROPERTIES FOLDER "Tests")
diff --git a/utils/lit/lit.py b/utils/lit/lit.py
index 2c5ecfe1d40f..af070c69b5bd 100755
--- a/utils/lit/lit.py
+++ b/utils/lit/lit.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+import sys
 
 from lit.main import main
 
diff --git a/utils/lit/lit/LitConfig.py b/utils/lit/lit/LitConfig.py
index 3351ebed54bd..389e5652e9be 100644
--- a/utils/lit/lit/LitConfig.py
+++ b/utils/lit/lit/LitConfig.py
@@ -21,7 +21,7 @@ class LitConfig(object):
 
     def __init__(self, progname, path, quiet,
                  useValgrind, valgrindLeakCheck, valgrindArgs,
-                 noExecute, debug, isWindows,
+                 noExecute, debug, isWindows, singleProcess,
                  params, config_prefix = None,
                  maxIndividualTestTime = 0,
                  maxFailures = None,
@@ -37,15 +37,17 @@ class LitConfig(object):
         self.valgrindUserArgs = list(valgrindArgs)
         self.noExecute = noExecute
         self.debug = debug
+        self.singleProcess = singleProcess
         self.isWindows = bool(isWindows)
         self.params = dict(params)
         self.bashPath = None
 
         # Configuration files to look for when discovering test suites.
         self.config_prefix = config_prefix or 'lit'
-        self.config_name = '%s.cfg' % (self.config_prefix,)
-        self.site_config_name = '%s.site.cfg' % (self.config_prefix,)
-        self.local_config_name = '%s.local.cfg' % (self.config_prefix,)
+        self.suffixes = ['cfg.py', 'cfg']
+        self.config_names = ['%s.%s' % (self.config_prefix,x) for x in self.suffixes]
+        self.site_config_names = ['%s.site.%s' % (self.config_prefix,x) for x in self.suffixes]
+        self.local_config_names = ['%s.local.%s' % (self.config_prefix,x) for x in self.suffixes]
 
         self.numErrors = 0
         self.numWarnings = 0
diff --git a/utils/lit/lit/TestRunner.py b/utils/lit/lit/TestRunner.py
index a60a0f854870..a49e1536860d 100644
--- a/utils/lit/lit/TestRunner.py
+++ b/utils/lit/lit/TestRunner.py
@@ -1,7 +1,14 @@
 from __future__ import absolute_import
+import difflib
+import errno
+import functools
+import itertools
+import getopt
 import os, signal, subprocess, sys
 import re
+import stat
 import platform
+import shutil
 import tempfile
 import threading
 
@@ -217,7 +224,20 @@ def quote_windows_command(seq):
 # cmd is export or env
 def updateEnv(env, cmd):
     arg_idx = 1
+    unset_next_env_var = False
     for arg_idx, arg in enumerate(cmd.args[1:]):
+        # Support for the -u flag (unsetting) for env command
+        # e.g., env -u FOO -u BAR will remove both FOO and BAR
+        # from the environment.
+        if arg == '-u':
+            unset_next_env_var = True
+            continue
+        if unset_next_env_var:
+            unset_next_env_var = False
+            if arg in env.env:
+                del env.env[arg]
+            continue
+
         # Partition the string into KEY=VALUE.
         key, eq, val = arg.partition('=')
         # Stop if there was no equals.
@@ -238,6 +258,7 @@ def executeBuiltinEcho(cmd, shenv):
     # Some tests have un-redirected echo commands to help debug test failures.
     # Buffer our output and return it to the caller.
     is_redirected = True
+    encode = lambda x : x
     if stdout == subprocess.PIPE:
         is_redirected = False
         stdout = StringIO()
@@ -245,6 +266,9 @@ def executeBuiltinEcho(cmd, shenv):
         # Reopen stdout in binary mode to avoid CRLF translation. The versions
         # of echo we are replacing on Windows all emit plain LF, and the LLVM
         # tests now depend on this.
+        # When we open as binary, however, this also means that we have to write
+        # 'bytes' objects to stdout instead of 'str' objects.
+        encode = lit.util.to_bytes
         stdout = open(stdout.name, stdout.mode + 'b')
         opened_files.append((None, None, stdout, None))
 
@@ -265,17 +289,18 @@ def executeBuiltinEcho(cmd, shenv):
     def maybeUnescape(arg):
         if not interpret_escapes:
             return arg
-        # Python string escapes and "echo" escapes are obviously different, but
-        # this should be enough for the LLVM test suite.
-        return arg.decode('string_escape')
+
+        arg = lit.util.to_bytes(arg)
+        codec = 'string_escape' if sys.version_info < (3,0) else 'unicode_escape'
+        return arg.decode(codec)
 
     if args:
         for arg in args[:-1]:
-            stdout.write(maybeUnescape(arg))
-            stdout.write(' ')
-        stdout.write(maybeUnescape(args[-1]))
+            stdout.write(encode(maybeUnescape(arg)))
+            stdout.write(encode(' '))
+        stdout.write(encode(maybeUnescape(args[-1])))
     if write_newline:
-        stdout.write('\n')
+        stdout.write(encode('\n'))
 
     for (name, mode, f, path) in opened_files:
         f.close()
@@ -284,6 +309,152 @@ def executeBuiltinEcho(cmd, shenv):
         return stdout.getvalue()
     return ""
 
+def executeBuiltinMkdir(cmd, cmd_shenv):
+    """executeBuiltinMkdir - Create new directories."""
+    args = expand_glob_expressions(cmd.args, cmd_shenv.cwd)[1:]
+    try:
+        opts, args = getopt.gnu_getopt(args, 'p')
+    except getopt.GetoptError as err:
+        raise InternalShellError(cmd, "Unsupported: 'mkdir':  %s" % str(err))
+
+    parent = False
+    for o, a in opts:
+        if o == "-p":
+            parent = True
+        else:
+            assert False, "unhandled option"
+
+    if len(args) == 0:
+        raise InternalShellError(cmd, "Error: 'mkdir' is missing an operand")
+
+    stderr = StringIO()
+    exitCode = 0
+    for dir in args:
+        if not os.path.isabs(dir):
+            dir = os.path.realpath(os.path.join(cmd_shenv.cwd, dir))
+        if parent:
+            lit.util.mkdir_p(dir)
+        else:
+            try:
+                os.mkdir(dir)
+            except OSError as err:
+                stderr.write("Error: 'mkdir' command failed, %s\n" % str(err))
+                exitCode = 1
+    return ShellCommandResult(cmd, "", stderr.getvalue(), exitCode, False)
+
+def executeBuiltinDiff(cmd, cmd_shenv):
+    """executeBuiltinDiff - Compare files line by line."""
+    args = expand_glob_expressions(cmd.args, cmd_shenv.cwd)[1:]
+    try:
+        opts, args = getopt.gnu_getopt(args, "wbu", ["strip-trailing-cr"])
+    except getopt.GetoptError as err:
+        raise InternalShellError(cmd, "Unsupported: 'diff':  %s" % str(err))
+
+    filelines, filepaths = ([] for i in range(2))
+    ignore_all_space = False
+    ignore_space_change = False
+    unified_diff = False
+    strip_trailing_cr = False
+    for o, a in opts:
+        if o == "-w":
+            ignore_all_space = True
+        elif o == "-b":
+            ignore_space_change = True
+        elif o == "-u":
+            unified_diff = True
+        elif o == "--strip-trailing-cr":
+            strip_trailing_cr = True
+        else:
+            assert False, "unhandled option"
+
+    if len(args) != 2:
+        raise InternalShellError(cmd, "Error:  missing or extra operand")
+
+    stderr = StringIO()
+    stdout = StringIO()
+    exitCode = 0
+    try:
+        for file in args:
+            if not os.path.isabs(file):
+                file = os.path.realpath(os.path.join(cmd_shenv.cwd, file))
+            filepaths.append(file)
+            with open(file, 'r') as f:
+                filelines.append(f.readlines())
+
+        def compose2(f, g):
+            return lambda x: f(g(x))
+
+        f = lambda x: x
+        if strip_trailing_cr:
+            f = compose2(lambda line: line.rstrip('\r'), f)
+        if ignore_all_space or ignore_space_change:
+            ignoreSpace = lambda line, separator: separator.join(line.split())
+            ignoreAllSpaceOrSpaceChange = functools.partial(ignoreSpace, separator='' if ignore_all_space else ' ')
+            f = compose2(ignoreAllSpaceOrSpaceChange, f)
+
+        for idx, lines in enumerate(filelines):
+            filelines[idx]= [f(line) for line in lines]
+
+        func = difflib.unified_diff if unified_diff else difflib.context_diff
+        for diff in func(filelines[0], filelines[1], filepaths[0], filepaths[1]):
+            stdout.write(diff)
+            exitCode = 1
+    except IOError as err:
+        stderr.write("Error: 'diff' command failed, %s\n" % str(err))
+        exitCode = 1
+
+    return ShellCommandResult(cmd, stdout.getvalue(), stderr.getvalue(), exitCode, False)
+
+def executeBuiltinRm(cmd, cmd_shenv):
+    """executeBuiltinRm - Removes (deletes) files or directories."""
+    args = expand_glob_expressions(cmd.args, cmd_shenv.cwd)[1:]
+    try:
+        opts, args = getopt.gnu_getopt(args, "frR", ["--recursive"])
+    except getopt.GetoptError as err:
+        raise InternalShellError(cmd, "Unsupported: 'rm':  %s" % str(err))
+
+    force = False
+    recursive = False
+    for o, a in opts:
+        if o == "-f":
+            force = True
+        elif o in ("-r", "-R", "--recursive"):
+            recursive = True
+        else:
+            assert False, "unhandled option"
+
+    if len(args) == 0:
+        raise InternalShellError(cmd, "Error: 'rm' is missing an operand")
+
+    def on_rm_error(func, path, exc_info):
+        # path contains the path of the file that couldn't be removed
+        # let's just assume that it's read-only and remove it.
+        os.chmod(path, stat.S_IMODE( os.stat(path).st_mode) | stat.S_IWRITE)
+        os.remove(path)
+
+    stderr = StringIO()
+    exitCode = 0
+    for path in args:
+        if not os.path.isabs(path):
+            path = os.path.realpath(os.path.join(cmd_shenv.cwd, path))
+        if force and not os.path.exists(path):
+            continue
+        try:
+            if os.path.isdir(path):
+                if not recursive:
+                    stderr.write("Error: %s is a directory\n" % path)
+                    exitCode = 1
+                shutil.rmtree(path, onerror = on_rm_error if force else None)
+            else:
+                if force and not os.access(path, os.W_OK):
+                    os.chmod(path,
+                             stat.S_IMODE(os.stat(path).st_mode) | stat.S_IWRITE)
+                os.remove(path)
+        except OSError as err:
+            stderr.write("Error: 'rm' command failed, %s" % str(err))
+            exitCode = 1
+    return ShellCommandResult(cmd, "", stderr.getvalue(), exitCode, False)
+
 def processRedirects(cmd, stdin_source, cmd_shenv, opened_files):
     """Return the standard fds for cmd after applying redirects
 
@@ -442,6 +613,30 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper):
         updateEnv(shenv, cmd.commands[0])
         return 0
 
+    if cmd.commands[0].args[0] == 'mkdir':
+        if len(cmd.commands) != 1:
+            raise InternalShellError(cmd.commands[0], "Unsupported: 'mkdir' "
+                                     "cannot be part of a pipeline")
+        cmdResult = executeBuiltinMkdir(cmd.commands[0], shenv)
+        results.append(cmdResult)
+        return cmdResult.exitCode
+
+    if cmd.commands[0].args[0] == 'diff':
+        if len(cmd.commands) != 1:
+            raise InternalShellError(cmd.commands[0], "Unsupported: 'diff' "
+                                     "cannot be part of a pipeline")
+        cmdResult = executeBuiltinDiff(cmd.commands[0], shenv)
+        results.append(cmdResult)
+        return cmdResult.exitCode
+
+    if cmd.commands[0].args[0] == 'rm':
+        if len(cmd.commands) != 1:
+            raise InternalShellError(cmd.commands[0], "Unsupported: 'rm' "
+                                     "cannot be part of a pipeline")
+        cmdResult = executeBuiltinRm(cmd.commands[0], shenv)
+        results.append(cmdResult)
+        return cmdResult.exitCode
+
     procs = []
     default_stdin = subprocess.PIPE
     stderrTempFiles = []
@@ -711,6 +906,7 @@ def executeScript(test, litConfig, tmpBase, commands, cwd):
       mode += 'b'  # Avoid CRLFs when writing bash scripts.
     f = open(script, mode)
     if isWin32CMDEXE:
+        f.write('@echo off\n')
         f.write('\nif %ERRORLEVEL% NEQ 0 EXIT\n'.join(commands))
     else:
         if test.config.pipefail:
@@ -787,9 +983,13 @@ def parseIntegratedTestScriptCommands(source_path, keywords):
             # command. Note that we take care to return regular strings in
             # Python 2, to avoid other code having to differentiate between the
             # str and unicode types.
+            #
+            # Opening the file in binary mode prevented Windows \r newline
+            # characters from being converted to Unix \n newlines, so manually
+            # strip those from the yielded lines.
             keyword,ln = match.groups()
             yield (line_number, to_string(keyword.decode('utf-8')),
-                   to_string(ln.decode('utf-8')))
+                   to_string(ln.decode('utf-8').rstrip('\r')))
     finally:
         f.close()
 
@@ -802,6 +1002,13 @@ def getTempPaths(test):
     tmpBase = os.path.join(tmpDir, execbase)
     return tmpDir, tmpBase
 
+def colonNormalizePath(path):
+    if kIsWindows:
+        return re.sub(r'^(.):', r'\1', path.replace('\\', '/'))
+    else:
+        assert path[0] == '/'
+        return path[1:]
+
 def getDefaultSubstitutions(test, tmpDir, tmpBase, normalize_slashes=False):
     sourcepath = test.getSourcePath()
     sourcedir = os.path.dirname(sourcepath)
@@ -837,23 +1044,15 @@ def getDefaultSubstitutions(test, tmpDir, tmpBase, normalize_slashes=False):
             ('%/T', tmpDir.replace('\\', '/')),
             ])
 
-    # "%:[STpst]" are paths without colons.
-    if kIsWindows:
-        substitutions.extend([
-                ('%:s', re.sub(r'^(.):', r'\1', sourcepath)),
-                ('%:S', re.sub(r'^(.):', r'\1', sourcedir)),
-                ('%:p', re.sub(r'^(.):', r'\1', sourcedir)),
-                ('%:t', re.sub(r'^(.):', r'\1', tmpBase) + '.tmp'),
-                ('%:T', re.sub(r'^(.):', r'\1', tmpDir)),
-                ])
-    else:
-        substitutions.extend([
-                ('%:s', sourcepath),
-                ('%:S', sourcedir),
-                ('%:p', sourcedir),
-                ('%:t', tmpBase + '.tmp'),
-                ('%:T', tmpDir),
-                ])
+    # "%:[STpst]" are normalized paths without colons and without a leading
+    # slash.
+    substitutions.extend([
+            ('%:s', colonNormalizePath(sourcepath)),
+            ('%:S', colonNormalizePath(sourcedir)),
+            ('%:p', colonNormalizePath(sourcedir)),
+            ('%:t', colonNormalizePath(tmpBase + '.tmp')),
+            ('%:T', colonNormalizePath(tmpDir)),
+            ])
     return substitutions
 
 def applySubstitutions(script, substitutions):
diff --git a/utils/lit/lit/discovery.py b/utils/lit/lit/discovery.py
index 4befe582d454..398b6cecd5a5 100644
--- a/utils/lit/lit/discovery.py
+++ b/utils/lit/lit/discovery.py
@@ -10,13 +10,18 @@ import lit.run
 from lit.TestingConfig import TestingConfig
 from lit import LitConfig, Test
 
+def chooseConfigFileFromDir(dir, config_names):
+    for name in config_names:
+        p = os.path.join(dir, name)
+        if os.path.exists(p):
+            return p
+    return None
+
 def dirContainsTestSuite(path, lit_config):
-    cfgpath = os.path.join(path, lit_config.site_config_name)
-    if os.path.exists(cfgpath):
-        return cfgpath
-    cfgpath = os.path.join(path, lit_config.config_name)
-    if os.path.exists(cfgpath):
-        return cfgpath
+    cfgpath = chooseConfigFileFromDir(path, lit_config.site_config_names)
+    if not cfgpath:
+        cfgpath = chooseConfigFileFromDir(path, lit_config.config_names)
+    return cfgpath
 
 def getTestSuite(item, litConfig, cache):
     """getTestSuite(item, litConfig, cache) -> (suite, relative_path)
@@ -40,6 +45,20 @@ def getTestSuite(item, litConfig, cache):
             ts, relative = search(parent)
             return (ts, relative + (base,))
 
+        # This is a private builtin parameter which can be used to perform
+        # translation of configuration paths.  Specifically, this parameter
+        # can be set to a dictionary that the discovery process will consult
+        # when it finds a configuration it is about to load.  If the given
+        # path is in the map, the value of that key is a path to the
+        # configuration to load instead.
+        config_map = litConfig.params.get('config_map')
+        if config_map:
+            cfgpath = os.path.realpath(cfgpath)
+            cfgpath = os.path.normcase(cfgpath)
+            target = config_map.get(cfgpath)
+            if target:
+                cfgpath = target
+
         # We found a test suite, create a new config for it and load it.
         if litConfig.debug:
             litConfig.note('loading suite config %r' % cfgpath)
@@ -52,13 +71,14 @@ def getTestSuite(item, litConfig, cache):
 
     def search(path):
         # Check for an already instantiated test suite.
-        res = cache.get(path)
+        real_path = os.path.realpath(path)
+        res = cache.get(real_path)
         if res is None:
-            cache[path] = res = search1(path)
+            cache[real_path] = res = search1(path)
         return res
 
     # Canonicalize the path.
-    item = os.path.realpath(item)
+    item = os.path.normpath(os.path.join(os.getcwd(), item))
 
     # Skip files and virtual components.
     components = []
@@ -83,10 +103,10 @@ def getLocalConfig(ts, path_in_suite, litConfig, cache):
 
         # Check if there is a local configuration file.
         source_path = ts.getSourcePath(path_in_suite)
-        cfgpath = os.path.join(source_path, litConfig.local_config_name)
+        cfgpath = chooseConfigFileFromDir(source_path, litConfig.local_config_names)
 
         # If not, just reuse the parent config.
-        if not os.path.exists(cfgpath):
+        if not cfgpath:
             return parent
 
         # Otherwise, copy the current config and load the local configuration
@@ -211,7 +231,7 @@ def find_tests_for_inputs(lit_config, inputs):
                 f.close()
         else:
             actual_inputs.append(input)
-                    
+
     # Load the tests from the inputs.
     tests = []
     test_suite_cache = {}
@@ -242,6 +262,7 @@ def load_test_suite(inputs):
                                     useValgrind = False,
                                     valgrindLeakCheck = False,
                                     valgrindArgs = [],
+                                    singleProcess=False,
                                     noExecute = False,
                                     debug = False,
                                     isWindows = (platform.system()=='Windows'),
diff --git a/utils/lit/lit/formats/googletest.py b/utils/lit/lit/formats/googletest.py
index 9c55e71d2330..6696fabc4f5e 100644
--- a/utils/lit/lit/formats/googletest.py
+++ b/utils/lit/lit/formats/googletest.py
@@ -13,11 +13,14 @@ kIsWindows = sys.platform in ['win32', 'cygwin']
 class GoogleTest(TestFormat):
     def __init__(self, test_sub_dirs, test_suffix):
         self.test_sub_dirs = os.path.normcase(str(test_sub_dirs)).split(';')
-        self.test_suffix = str(test_suffix)
 
         # On Windows, assume tests will also end in '.exe'.
+        exe_suffix = str(test_suffix)
         if kIsWindows:
-            self.test_suffix += '.exe'
+            exe_suffix += '.exe'
+
+        # Also check for .py files for testing purposes.
+        self.test_suffixes = {exe_suffix, test_suffix + '.py'}
 
     def getGTestTests(self, path, litConfig, localConfig):
         """getGTestTests(path) - [name]
@@ -29,8 +32,10 @@ class GoogleTest(TestFormat):
           litConfig: LitConfig instance
           localConfig: TestingConfig instance"""
 
+        list_test_cmd = self.maybeAddPythonToCmd([path, '--gtest_list_tests'])
+
         try:
-            output = subprocess.check_output([path, '--gtest_list_tests'],
+            output = subprocess.check_output(list_test_cmd,
                                              env=localConfig.environment)
         except subprocess.CalledProcessError as exc:
             litConfig.warning(
@@ -82,7 +87,7 @@ class GoogleTest(TestFormat):
             if not os.path.isdir(dir_path):
                 continue
             for fn in lit.util.listdir_files(dir_path,
-                                             suffixes={self.test_suffix}):
+                                             suffixes=self.test_suffixes):
                 # Discover the tests in this executable.
                 execpath = os.path.join(source_path, subdir, fn)
                 testnames = self.getGTestTests(execpath, litConfig, localConfig)
@@ -100,6 +105,7 @@ class GoogleTest(TestFormat):
             testName = namePrefix + '/' + testName
 
         cmd = [testPath, '--gtest_filter=' + testName]
+        cmd = self.maybeAddPythonToCmd(cmd)
         if litConfig.useValgrind:
             cmd = litConfig.valgrindArgs + cmd
 
@@ -126,3 +132,14 @@ class GoogleTest(TestFormat):
             return lit.Test.UNRESOLVED, msg
 
         return lit.Test.PASS,''
+
+    def maybeAddPythonToCmd(self, cmd):
+        """Insert the python exe into the command if cmd[0] ends in .py
+
+        We cannot rely on the system to interpret shebang lines for us on
+        Windows, so add the python executable to the command if this is a .py
+        script.
+        """
+        if cmd[0].endswith('.py'):
+            return [sys.executable] + cmd
+        return cmd
diff --git a/utils/lit/lit/llvm/__init__.py b/utils/lit/lit/llvm/__init__.py
new file mode 100644
index 000000000000..7a46daf24710
--- /dev/null
+++ b/utils/lit/lit/llvm/__init__.py
@@ -0,0 +1,9 @@
+from lit.llvm import config
+
+llvm_config = None
+
+
+def initialize(lit_config, test_config):
+    global llvm_config
+
+    llvm_config = config.LLVMConfig(lit_config, test_config)
diff --git a/utils/lit/lit/llvm/config.py b/utils/lit/lit/llvm/config.py
new file mode 100644
index 000000000000..554da93f110b
--- /dev/null
+++ b/utils/lit/lit/llvm/config.py
@@ -0,0 +1,473 @@
+import os
+import platform
+import re
+import subprocess
+import sys
+
+import lit.util
+from lit.llvm.subst import FindTool
+from lit.llvm.subst import ToolSubst
+
+
+def binary_feature(on, feature, off_prefix):
+    return feature if on else off_prefix + feature
+
+
+class LLVMConfig(object):
+
+    def __init__(self, lit_config, config):
+        self.lit_config = lit_config
+        self.config = config
+
+        features = config.available_features
+
+        self.use_lit_shell = False
+        # Tweak PATH for Win32 to decide to use bash.exe or not.
+        if sys.platform == 'win32':
+            # For tests that require Windows to run.
+            features.add('system-windows')
+
+            # Seek sane tools in directories and set to $PATH.
+            path = self.lit_config.getToolsPath(config.lit_tools_dir,
+                                                config.environment['PATH'],
+                                                ['cmp.exe', 'grep.exe', 'sed.exe'])
+            if path is not None:
+                self.with_environment('PATH', path, append_path=True)
+            self.use_lit_shell = True
+
+        # Choose between lit's internal shell pipeline runner and a real shell.  If
+        # LIT_USE_INTERNAL_SHELL is in the environment, we use that as an override.
+        lit_shell_env = os.environ.get('LIT_USE_INTERNAL_SHELL')
+        if lit_shell_env:
+            self.use_lit_shell = lit.util.pythonize_bool(lit_shell_env)
+
+        if not self.use_lit_shell:
+            features.add('shell')
+
+        # Running on Darwin OS
+        if platform.system() == 'Darwin':
+            # FIXME: lld uses the first, other projects use the second.
+            # We should standardize on the former.
+            features.add('system-linker-mach-o')
+            features.add('system-darwin')
+        elif platform.system() == 'Windows':
+            # For tests that require Windows to run.
+            features.add('system-windows')
+        elif platform.system() == "Linux":
+            features.add('system-linux')
+
+        # Native compilation: host arch == default triple arch
+        # Both of these values should probably be in every site config (e.g. as
+        # part of the standard header.  But currently they aren't)
+        host_triple = getattr(config, 'host_triple', None)
+        target_triple = getattr(config, 'target_triple', None)
+        if host_triple and host_triple == target_triple:
+            features.add('native')
+
+        # Sanitizers.
+        sanitizers = getattr(config, 'llvm_use_sanitizer', '')
+        sanitizers = frozenset(x.lower() for x in sanitizers.split(';'))
+        features.add(binary_feature('address' in sanitizers, 'asan', 'not_'))
+        features.add(binary_feature('memory' in sanitizers, 'msan', 'not_'))
+        features.add(binary_feature(
+            'undefined' in sanitizers, 'ubsan', 'not_'))
+
+        have_zlib = getattr(config, 'have_zlib', None)
+        features.add(binary_feature(have_zlib, 'zlib', 'no'))
+
+        # Check if we should run long running tests.
+        long_tests = lit_config.params.get('run_long_tests', None)
+        if lit.util.pythonize_bool(long_tests):
+            features.add('long_tests')
+
+        if target_triple:
+            if re.match(r'^x86_64.*-apple', target_triple):
+                host_cxx = getattr(config, 'host_cxx', None)
+                if 'address' in sanitizers and self.get_clang_has_lsan(host_cxx, target_triple):
+                    self.with_environment(
+                        'ASAN_OPTIONS', 'detect_leaks=1', append_path=True)
+            if re.match(r'^x86_64.*-linux', target_triple):
+                features.add('x86_64-linux')
+            if re.match(r'.*-win32$', target_triple):
+                features.add('target-windows')
+
+        use_gmalloc = lit_config.params.get('use_gmalloc', None)
+        if lit.util.pythonize_bool(use_gmalloc):
+            # Allow use of an explicit path for gmalloc library.
+            # Will default to '/usr/lib/libgmalloc.dylib' if not set.
+            gmalloc_path_str = lit_config.params.get('gmalloc_path',
+                                                     '/usr/lib/libgmalloc.dylib')
+            if gmalloc_path_str is not None:
+                self.with_environment(
+                    'DYLD_INSERT_LIBRARIES', gmalloc_path_str)
+
+        breaking_checks = getattr(config, 'enable_abi_breaking_checks', None)
+        if lit.util.pythonize_bool(breaking_checks):
+            features.add('abi-breaking-checks')
+
+    def with_environment(self, variable, value, append_path=False):
+        if append_path:
+            # For paths, we should be able to take a list of them and process all
+            # of them.
+            paths_to_add = value
+            if lit.util.is_string(paths_to_add):
+                paths_to_add = [paths_to_add]
+
+            def norm(x):
+                return os.path.normcase(os.path.normpath(x))
+
+            current_paths = self.config.environment.get(variable, None)
+            if current_paths:
+                current_paths = current_paths.split(os.path.pathsep)
+                paths = [norm(p) for p in current_paths]
+            else:
+                paths = []
+
+            # If we are passed a list [a b c], then iterating this list forwards
+            # and adding each to the beginning would result in b c a.  So we
+            # need to iterate in reverse to end up with the original ordering.
+            for p in reversed(paths_to_add):
+                # Move it to the front if it already exists, otherwise insert it at the
+                # beginning.
+                p = norm(p)
+                try:
+                    paths.remove(p)
+                except ValueError:
+                    pass
+                paths = [p] + paths
+            value = os.pathsep.join(paths)
+        self.config.environment[variable] = value
+
+    def with_system_environment(self, variables, append_path=False):
+        if lit.util.is_string(variables):
+            variables = [variables]
+        for v in variables:
+            value = os.environ.get(v)
+            if value:
+                self.with_environment(v, value, append_path)
+
+    def clear_environment(self, variables):
+        for name in variables:
+            if name in self.config.environment:
+                del self.config.environment[name]
+
+    def get_process_output(self, command):
+        try:
+            cmd = subprocess.Popen(
+                command, stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE, env=self.config.environment)
+            stdout, stderr = cmd.communicate()
+            stdout = lit.util.to_string(stdout)
+            stderr = lit.util.to_string(stderr)
+            return (stdout, stderr)
+        except OSError:
+            self.lit_config.fatal('Could not run process %s' % command)
+
+    def feature_config(self, features):
+        # Ask llvm-config about the specified feature.
+        arguments = [x for (x, _) in features]
+        config_path = os.path.join(self.config.llvm_tools_dir, 'llvm-config')
+
+        output, _ = self.get_process_output([config_path] + arguments)
+        lines = output.split('\n')
+
+        for (feature_line, (_, patterns)) in zip(lines, features):
+            # We should have either a callable or a dictionary.  If it's a
+            # dictionary, grep each key against the output and use the value if
+            # it matches.  If it's a callable, it does the entire translation.
+            if callable(patterns):
+                features_to_add = patterns(feature_line)
+                self.config.available_features.update(features_to_add)
+            else:
+                for (re_pattern, feature) in patterns.items():
+                    if re.search(re_pattern, feature_line):
+                        self.config.available_features.add(feature)
+
+    # Note that when substituting %clang_cc1 also fill in the include directory of
+    # the builtin headers. Those are part of even a freestanding environment, but
+    # Clang relies on the driver to locate them.
+    def get_clang_builtin_include_dir(self, clang):
+        # FIXME: Rather than just getting the version, we should have clang print
+        # out its resource dir here in an easy to scrape form.
+        clang_dir, _ = self.get_process_output(
+            [clang, '-print-file-name=include'])
+
+        if not clang_dir:
+            self.lit_config.fatal(
+                "Couldn't find the include dir for Clang ('%s')" % clang)
+
+        clang_dir = clang_dir.strip()
+        if sys.platform in ['win32'] and not self.use_lit_shell:
+            # Don't pass dosish path separator to msys bash.exe.
+            clang_dir = clang_dir.replace('\\', '/')
+        # Ensure the result is an ascii string, across Python2.5+ - Python3.
+        return clang_dir
+
+    # On macOS, LSan is only supported on clang versions 5 and higher
+    def get_clang_has_lsan(self, clang, triple):
+        if not clang:
+            self.lit_config.warning(
+                'config.host_cxx is unset but test suite is configured to use sanitizers.')
+            return False
+
+        clang_binary = clang.split()[0]
+        version_string, _ = self.get_process_output(
+            [clang_binary, '--version'])
+        if not 'clang' in version_string:
+            self.lit_config.warning(
+                "compiler '%s' does not appear to be clang, " % clang_binary +
+                'but test suite is configured to use sanitizers.')
+            return False
+
+        if re.match(r'.*-linux', triple):
+            return True
+
+        if re.match(r'^x86_64.*-apple', triple):
+            version_regex = re.search(r'version ([0-9]+)\.([0-9]+).([0-9]+)', version_string)
+            major_version_number = int(version_regex.group(1))
+            minor_version_number = int(version_regex.group(2))
+            patch_version_number = int(version_regex.group(3))
+            if 'Apple LLVM' in version_string:
+                # Apple LLVM doesn't yet support LSan
+                return False
+            else:
+                return major_version_number >= 5
+
+        return False
+
+    def make_itanium_abi_triple(self, triple):
+        m = re.match(r'(\w+)-(\w+)-(\w+)', triple)
+        if not m:
+            self.lit_config.fatal(
+                "Could not turn '%s' into Itanium ABI triple" % triple)
+        if m.group(3).lower() != 'win32':
+            # All non-win32 triples use the Itanium ABI.
+            return triple
+        return m.group(1) + '-' + m.group(2) + '-mingw32'
+
+    def make_msabi_triple(self, triple):
+        m = re.match(r'(\w+)-(\w+)-(\w+)', triple)
+        if not m:
+            self.lit_config.fatal(
+                "Could not turn '%s' into MS ABI triple" % triple)
+        isa = m.group(1).lower()
+        vendor = m.group(2).lower()
+        os = m.group(3).lower()
+        if os == 'win32':
+            # If the OS is win32, we're done.
+            return triple
+        if isa.startswith('x86') or isa == 'amd64' or re.match(r'i\d86', isa):
+            # For x86 ISAs, adjust the OS.
+            return isa + '-' + vendor + '-win32'
+        # -win32 is not supported for non-x86 targets; use a default.
+        return 'i686-pc-win32'
+
+    def add_tool_substitutions(self, tools, search_dirs=None):
+        if not search_dirs:
+            search_dirs = [self.config.llvm_tools_dir]
+
+        if lit.util.is_string(search_dirs):
+            search_dirs = [search_dirs]
+
+        tools = [x if isinstance(x, ToolSubst) else ToolSubst(x)
+                 for x in tools]
+
+        search_dirs = os.pathsep.join(search_dirs)
+        substitutions = []
+
+        for tool in tools:
+            match = tool.resolve(self, search_dirs)
+
+            # Either no match occurred, or there was an unresolved match that
+            # is ignored.
+            if not match:
+                continue
+
+            subst_key, tool_pipe, command = match
+
+            # An unresolved match occurred that can't be ignored.  Fail without
+            # adding any of the previously-discovered substitutions.
+            if not command:
+                return False
+
+            substitutions.append((subst_key, tool_pipe + command))
+
+        self.config.substitutions.extend(substitutions)
+        return True
+
+    def use_default_substitutions(self):
+        tool_patterns = [
+            ToolSubst('FileCheck', unresolved='fatal'),
+            # Handle these specially as they are strings searched for during testing.
+            ToolSubst(r'\| \bcount\b', command=FindTool(
+                'count'), verbatim=True, unresolved='fatal'),
+            ToolSubst(r'\| \bnot\b', command=FindTool('not'), verbatim=True, unresolved='fatal')]
+
+        self.config.substitutions.append(('%python', sys.executable))
+        self.add_tool_substitutions(
+            tool_patterns, [self.config.llvm_tools_dir])
+
+    def use_llvm_tool(self, name, search_env=None, required=False, quiet=False):
+        """Find the executable program 'name', optionally using the specified
+        environment variable as an override before searching the
+        configuration's PATH."""
+        # If the override is specified in the environment, use it without
+        # validation.
+        if search_env:
+            tool = self.config.environment.get(search_env)
+            if tool:
+                return tool
+
+        # Otherwise look in the path.
+        tool = lit.util.which(name, self.config.environment['PATH'])
+
+        if required and not tool:
+            message = "couldn't find '{}' program".format(name)
+            if search_env:
+                message = message + \
+                    ', try setting {} in your environment'.format(search_env)
+            self.lit_config.fatal(message)
+
+        if tool:
+            tool = os.path.normpath(tool)
+            if not self.lit_config.quiet and not quiet:
+                self.lit_config.note('using {}: {}'.format(name, tool))
+        return tool
+
+    def use_clang(self, required=True):
+        """Configure the test suite to be able to invoke clang.
+
+        Sets up some environment variables important to clang, locates a
+        just-built or installed clang, and add a set of standard
+        substitutions useful to any test suite that makes use of clang.
+
+        """
+        # Clear some environment variables that might affect Clang.
+        #
+        # This first set of vars are read by Clang, but shouldn't affect tests
+        # that aren't specifically looking for these features, or are required
+        # simply to run the tests at all.
+        #
+        # FIXME: Should we have a tool that enforces this?
+
+        # safe_env_vars = ('TMPDIR', 'TEMP', 'TMP', 'USERPROFILE', 'PWD',
+        #                  'MACOSX_DEPLOYMENT_TARGET', 'IPHONEOS_DEPLOYMENT_TARGET',
+        #                  'VCINSTALLDIR', 'VC100COMNTOOLS', 'VC90COMNTOOLS',
+        #                  'VC80COMNTOOLS')
+        possibly_dangerous_env_vars = ['COMPILER_PATH', 'RC_DEBUG_OPTIONS',
+                                       'CINDEXTEST_PREAMBLE_FILE', 'LIBRARY_PATH',
+                                       'CPATH', 'C_INCLUDE_PATH', 'CPLUS_INCLUDE_PATH',
+                                       'OBJC_INCLUDE_PATH', 'OBJCPLUS_INCLUDE_PATH',
+                                       'LIBCLANG_TIMING', 'LIBCLANG_OBJTRACKING',
+                                       'LIBCLANG_LOGGING', 'LIBCLANG_BGPRIO_INDEX',
+                                       'LIBCLANG_BGPRIO_EDIT', 'LIBCLANG_NOTHREADS',
+                                       'LIBCLANG_RESOURCE_USAGE',
+                                       'LIBCLANG_CODE_COMPLETION_LOGGING']
+        # Clang/Win32 may refer to %INCLUDE%. vsvarsall.bat sets it.
+        if platform.system() != 'Windows':
+            possibly_dangerous_env_vars.append('INCLUDE')
+
+        self.clear_environment(possibly_dangerous_env_vars)
+
+        # Tweak the PATH to include the tools dir and the scripts dir.
+        # Put Clang first to avoid LLVM from overriding out-of-tree clang builds.
+        possible_paths = ['clang_tools_dir', 'llvm_tools_dir']
+        paths = [getattr(self.config, pp) for pp in possible_paths
+                 if getattr(self.config, pp, None)]
+        self.with_environment('PATH', paths, append_path=True)
+
+        paths = [self.config.llvm_shlib_dir, self.config.llvm_libs_dir]
+        self.with_environment('LD_LIBRARY_PATH', paths, append_path=True)
+
+        # Discover the 'clang' and 'clangcc' to use.
+
+        self.config.clang = self.use_llvm_tool(
+            'clang', search_env='CLANG', required=required)
+
+        self.config.substitutions.append(
+            ('%llvmshlibdir', self.config.llvm_shlib_dir))
+        self.config.substitutions.append(
+            ('%pluginext', self.config.llvm_plugin_ext))
+
+        builtin_include_dir = self.get_clang_builtin_include_dir(self.config.clang)
+        tool_substitutions = [
+            ToolSubst('%clang', command=self.config.clang),
+            ToolSubst('%clang_analyze_cc1', command='%clang_cc1', extra_args=['-analyze']),
+            ToolSubst('%clang_cc1', command=self.config.clang, extra_args=['-cc1', '-internal-isystem', builtin_include_dir, '-nostdsysteminc']),
+            ToolSubst('%clang_cpp', command=self.config.clang, extra_args=['--driver-mode=cpp']),
+            ToolSubst('%clang_cl', command=self.config.clang, extra_args=['--driver-mode=cl']),
+            ToolSubst('%clangxx', command=self.config.clang, extra_args=['--driver-mode=g++']),
+            ]
+        self.add_tool_substitutions(tool_substitutions)
+
+        self.config.substitutions.append(('%itanium_abi_triple',
+                                          self.make_itanium_abi_triple(self.config.target_triple)))
+        self.config.substitutions.append(('%ms_abi_triple',
+                                          self.make_msabi_triple(self.config.target_triple)))
+        self.config.substitutions.append(
+            ('%resource_dir', builtin_include_dir))
+
+        # The host triple might not be set, at least if we're compiling clang from
+        # an already installed llvm.
+        if self.config.host_triple and self.config.host_triple != '@LLVM_HOST_TRIPLE@':
+            self.config.substitutions.append(('%target_itanium_abi_host_triple',
+                                              '--target=%s' % self.make_itanium_abi_triple(self.config.host_triple)))
+        else:
+            self.config.substitutions.append(
+                ('%target_itanium_abi_host_triple', ''))
+
+        self.config.substitutions.append(
+            ('%src_include_dir', self.config.clang_src_dir + '/include'))
+
+        # FIXME: Find nicer way to prohibit this.
+        self.config.substitutions.append(
+            (' clang ', """*** Do not use 'clang' in tests, use '%clang'. ***"""))
+        self.config.substitutions.append(
+            (' clang\+\+ ', """*** Do not use 'clang++' in tests, use '%clangxx'. ***"""))
+        self.config.substitutions.append(
+            (' clang-cc ',
+             """*** Do not use 'clang-cc' in tests, use '%clang_cc1'. ***"""))
+        self.config.substitutions.append(
+            (' clang -cc1 -analyze ',
+             """*** Do not use 'clang -cc1 -analyze' in tests, use '%clang_analyze_cc1'. ***"""))
+        self.config.substitutions.append(
+            (' clang -cc1 ',
+             """*** Do not use 'clang -cc1' in tests, use '%clang_cc1'. ***"""))
+        self.config.substitutions.append(
+            (' %clang-cc1 ',
+             """*** invalid substitution, use '%clang_cc1'. ***"""))
+        self.config.substitutions.append(
+            (' %clang-cpp ',
+             """*** invalid substitution, use '%clang_cpp'. ***"""))
+        self.config.substitutions.append(
+            (' %clang-cl ',
+             """*** invalid substitution, use '%clang_cl'. ***"""))
+
+    def use_lld(self, required=True):
+        """Configure the test suite to be able to invoke lld.
+
+        Sets up some environment variables important to lld, locates a
+        just-built or installed lld, and add a set of standard
+        substitutions useful to any test suite that makes use of lld.
+
+        """
+        # Tweak the PATH to include the tools dir
+        tool_dirs = [self.config.llvm_tools_dir]
+        lib_dirs = [self.config.llvm_libs_dir]
+        lld_tools_dir = getattr(self.config, 'lld_tools_dir', None)
+        lld_libs_dir = getattr(self.config, 'lld_libs_dir', None)
+
+        if lld_tools_dir:
+            tool_dirs = tool_dirs + [lld_tools_dir]
+        if lld_libs_dir:
+            lib_dirs = lib_dirs + [lld_libs_dir]
+
+        self.with_environment('PATH', tool_dirs, append_path=True)
+        self.with_environment('LD_LIBRARY_PATH', lib_dirs, append_path=True)
+
+        self.config.substitutions.append(
+            (r"\bld.lld\b", 'ld.lld --full-shutdown'))
+
+        tool_patterns = ['ld.lld', 'lld-link', 'lld']
+
+        self.add_tool_substitutions(tool_patterns, tool_dirs)
diff --git a/utils/lit/lit/llvm/subst.py b/utils/lit/lit/llvm/subst.py
new file mode 100644
index 000000000000..3c8db1d31ff2
--- /dev/null
+++ b/utils/lit/lit/llvm/subst.py
@@ -0,0 +1,145 @@
+import os
+import re
+
+import lit.util
+
+expr = re.compile(r"^(\\)?((\| )?)\W+b(\S+)\\b\W*$")
+wordifier = re.compile(r"(\W*)(\b[^\b]+\b)")
+
+
+class FindTool(object):
+    def __init__(self, name):
+        self.name = name
+
+    def resolve(self, config, dirs):
+        # Check for a user explicitely overriding a tool.  This allows:
+        #     llvm-lit -D llc="llc -enable-misched -verify-machineinstrs"
+        command = config.lit_config.params.get(self.name)
+        if command is None:
+            # Then check out search paths.
+            command = lit.util.which(self.name, dirs)
+            if not command:
+                return None
+
+        if self.name == 'llc' and os.environ.get('LLVM_ENABLE_MACHINE_VERIFIER') == '1':
+            command += ' -verify-machineinstrs'
+        elif self.name == 'llvm-go':
+            exe = getattr(config.config, 'go_executable', None)
+            if exe:
+                command += ' go=' + exe
+        return command
+
+
+class ToolSubst(object):
+    """String-like class used to build regex substitution patterns for llvm
+    tools.
+
+    Handles things like adding word-boundary patterns, and filtering
+    characters from the beginning an end of a tool name
+
+    """
+
+    def __init__(self, key, command=None, pre=r'.-^/\<', post='-.', verbatim=False,
+                 unresolved='warn', extra_args=None):
+        """Construct a ToolSubst.
+
+        key: The text which is to be substituted.
+
+        command: The command to substitute when the key is matched.  By default,
+        this will treat `key` as a tool name and search for it.  If it is
+        a string, it is intereprted as an exact path.  If it is an instance of
+        FindTool, the specified tool name is searched for on disk.
+
+        pre: If specified, the substitution will not find matches where
+        the character immediately preceding the word-boundary that begins
+        `key` is any of the characters in the string `pre`.
+
+        post: If specified, the substitution will not find matches where
+        the character immediately after the word-boundary that ends `key`
+        is any of the characters specified in the string `post`.
+
+        verbatim: If True, `key` is an exact regex that is passed to the
+        underlying substitution
+
+        unresolved: Action to take if the tool substitution cannot be
+        resolved.  Valid values:
+            'warn' - log a warning but add the substitution anyway.
+            'fatal' - Exit the test suite and log a fatal error.
+            'break' - Don't add any of the substitutions from the current
+                      group, and return a value indicating a failure.
+            'ignore' - Don't add the substitution, and don't log an error
+
+        extra_args: If specified, represents a list of arguments that will be
+        appended to the tool's substitution.
+
+        explicit_path: If specified, the exact path will be used as a substitution.
+        Otherwise, the tool will be searched for as if by calling which(tool)
+
+        """
+        self.unresolved = unresolved
+        self.extra_args = extra_args
+        self.key = key
+        self.command = command if command is not None else FindTool(key)
+        if verbatim:
+            self.regex = key
+            return
+
+        def not_in(chars, where=''):
+            if not chars:
+                return ''
+            pattern_str = '|'.join(re.escape(x) for x in chars)
+            return r'(?{}!({}))'.format(where, pattern_str)
+
+        def wordify(word):
+            match = wordifier.match(word)
+            introducer = match.group(1)
+            word = match.group(2)
+            return introducer + r'\b' + word + r'\b'
+
+        self.regex = not_in(pre, '<') + wordify(key) + not_in(post)
+
+    def resolve(self, config, search_dirs):
+        # Extract the tool name from the pattern.  This relies on the tool
+        # name being surrounded by \b word match operators.  If the
+        # pattern starts with "| ", include it in the string to be
+        # substituted.
+
+        tool_match = expr.match(self.regex)
+        if not tool_match:
+            return None
+
+        tool_pipe = tool_match.group(2)
+        tool_name = tool_match.group(4)
+
+        if isinstance(self.command, FindTool):
+            command_str = self.command.resolve(config, search_dirs)
+        else:
+            command_str = str(self.command)
+
+        if command_str:
+            if self.extra_args:
+                command_str = ' '.join([command_str] + self.extra_args)
+        else:
+            if self.unresolved == 'warn':
+                # Warn, but still provide a substitution.
+                config.lit_config.note(
+                    'Did not find ' + tool_name + ' in %s' % search_dirs)
+                command_str = os.path.join(
+                    config.config.llvm_tools_dir, tool_name)
+            elif self.unresolved == 'fatal':
+                # The function won't even return in this case, this leads to
+                # sys.exit
+                config.lit_config.fatal(
+                    'Did not find ' + tool_name + ' in %s' % search_dirs)
+            elif self.unresolved == 'break':
+                # By returning a valid result with an empty command, the
+                # caller treats this as a failure.
+                pass
+            elif self.unresolved == 'ignore':
+                # By returning None, the caller just assumes there was no
+                # match in the first place.
+                return None
+            else:
+                raise 'Unexpected value for ToolSubst.unresolved'
+
+        return (self.regex, tool_pipe, command_str)
diff --git a/utils/lit/lit/main.py b/utils/lit/lit/main.py
index f0162464ce33..95a5500a504c 100755
--- a/utils/lit/lit/main.py
+++ b/utils/lit/lit/main.py
@@ -289,9 +289,10 @@ def main_with_tmp(builtinParameters):
     debug_group.add_argument("--show-tests", dest="showTests",
                       help="Show all discovered tests",
                       action="store_true", default=False)
-    debug_group.add_argument("--use-processes", dest="executionStrategy",
-                      help="Run tests in parallel with processes (not threads)",
-                      action="store_const", const="PROCESSES")
+    debug_group.add_argument("--single-process", dest="singleProcess",
+                      help="Don't run tests in parallel.  Intended for debugging "
+                      "single test failures",
+                      action="store_true", default=False)
 
     opts = parser.parse_args()
     args = opts.test_paths
@@ -341,6 +342,7 @@ def main_with_tmp(builtinParameters):
         valgrindLeakCheck = opts.valgrindLeakCheck,
         valgrindArgs = opts.valgrindArgs,
         noExecute = opts.noExecute,
+        singleProcess = opts.singleProcess,
         debug = opts.debug,
         isWindows = isWindows,
         params = userParams,
diff --git a/utils/lit/lit/run.py b/utils/lit/lit/run.py
index 3e39bdb92203..c4f9eb2d0fc4 100644
--- a/utils/lit/lit/run.py
+++ b/utils/lit/lit/run.py
@@ -55,45 +55,7 @@ class Run(object):
         return _execute_test_impl(test, self.lit_config,
                                   self.parallelism_semaphores)
 
-    def execute_tests(self, display, jobs, max_time=None):
-        """
-        execute_tests(display, jobs, [max_time])
-
-        Execute each of the tests in the run, using up to jobs number of
-        parallel tasks, and inform the display of each individual result. The
-        provided tests should be a subset of the tests available in this run
-        object.
-
-        If max_time is non-None, it should be a time in seconds after which to
-        stop executing tests.
-
-        The display object will have its update method called with each test as
-        it is completed. The calls are guaranteed to be locked with respect to
-        one another, but are *not* guaranteed to be called on the same thread as
-        this method was invoked on.
-
-        Upon completion, each test in the run will have its result
-        computed. Tests which were not actually executed (for any reason) will
-        be given an UNRESOLVED result.
-        """
-        # Don't do anything if we aren't going to run any tests.
-        if not self.tests or jobs == 0:
-            return
-
-        # Install a console-control signal handler on Windows.
-        if win32api is not None:
-            def console_ctrl_handler(type):
-                print('\nCtrl-C detected, terminating.')
-                pool.terminate()
-                pool.join()
-                abort_now()
-                return True
-            win32api.SetConsoleCtrlHandler(console_ctrl_handler, True)
-
-        # Save the display object on the runner so that we can update it from
-        # our task completion callback.
-        self.display = display
-
+    def execute_tests_in_pool(self, jobs, max_time):
         # We need to issue many wait calls, so compute the final deadline and
         # subtract time.time() from that as we go along.
         deadline = None
@@ -109,9 +71,17 @@ class Run(object):
                                     (self.lit_config,
                                      self.parallelism_semaphores))
 
+        # Install a console-control signal handler on Windows.
+        if win32api is not None:
+            def console_ctrl_handler(type):
+                print('\nCtrl-C detected, terminating.')
+                pool.terminate()
+                pool.join()
+                abort_now()
+                return True
+            win32api.SetConsoleCtrlHandler(console_ctrl_handler, True)
+
         try:
-            self.failure_count = 0
-            self.hit_max_failures = False
             async_results = [pool.apply_async(worker_run_one_test,
                                               args=(test_index, test),
                                               callback=self.consume_test_result)
@@ -141,6 +111,46 @@ class Run(object):
         finally:
             pool.join()
 
+    def execute_tests(self, display, jobs, max_time=None):
+        """
+        execute_tests(display, jobs, [max_time])
+
+        Execute each of the tests in the run, using up to jobs number of
+        parallel tasks, and inform the display of each individual result. The
+        provided tests should be a subset of the tests available in this run
+        object.
+
+        If max_time is non-None, it should be a time in seconds after which to
+        stop executing tests.
+
+        The display object will have its update method called with each test as
+        it is completed. The calls are guaranteed to be locked with respect to
+        one another, but are *not* guaranteed to be called on the same thread as
+        this method was invoked on.
+
+        Upon completion, each test in the run will have its result
+        computed. Tests which were not actually executed (for any reason) will
+        be given an UNRESOLVED result.
+        """
+        # Don't do anything if we aren't going to run any tests.
+        if not self.tests or jobs == 0:
+            return
+
+        # Save the display object on the runner so that we can update it from
+        # our task completion callback.
+        self.display = display
+
+        self.failure_count = 0
+        self.hit_max_failures = False
+        if self.lit_config.singleProcess:
+            global child_lit_config
+            child_lit_config = self.lit_config
+            for test_index, test in enumerate(self.tests):
+                result = worker_run_one_test(test_index, test)
+                self.consume_test_result(result)
+        else:
+            self.execute_tests_in_pool(jobs, max_time)
+
         # Mark any tests that weren't run as UNRESOLVED.
         for test in self.tests:
             if test.result is None:
diff --git a/utils/lit/lit/util.py b/utils/lit/lit/util.py
index 1819d4d1c34f..5f20262d4c35 100644
--- a/utils/lit/lit/util.py
+++ b/utils/lit/lit/util.py
@@ -1,6 +1,7 @@
 import errno
 import itertools
 import math
+import numbers
 import os
 import platform
 import signal
@@ -8,11 +9,47 @@ import subprocess
 import sys
 import threading
 
+
+def norm_path(path):
+    path = os.path.realpath(path)
+    path = os.path.normpath(path)
+    path = os.path.normcase(path)
+    return path
+
+
+def is_string(value):
+    try:
+        # Python 2 and Python 3 are different here.
+        return isinstance(value, basestring)
+    except NameError:
+        return isinstance(value, str)
+
+
+def pythonize_bool(value):
+    if value is None:
+        return False
+    if type(value) is bool:
+        return value
+    if isinstance(value, numbers.Number):
+        return value != 0
+    if is_string(value):
+        if value.lower() in ('1', 'true', 'on', 'yes'):
+            return True
+        if value.lower() in ('', '0', 'false', 'off', 'no'):
+            return False
+    raise ValueError('"{}" is not a valid boolean'.format(value))
+
+
+def make_word_regex(word):
+    return r'\b' + word + r'\b'
+
+
 def to_bytes(s):
     """Return the parameter as type 'bytes', possibly encoding it.
 
-    In Python2, the 'bytes' type is the same as 'str'. In Python3, they are
-    distinct.
+    In Python2, the 'bytes' type is the same as 'str'. In Python3, they
+    are distinct.
+
     """
     if isinstance(s, bytes):
         # In Python2, this branch is taken for both 'str' and 'bytes'.
@@ -23,12 +60,14 @@ def to_bytes(s):
     # Encode to UTF-8 to get 'bytes' data.
     return s.encode('utf-8')
 
+
 def to_string(b):
     """Return the parameter as type 'str', possibly encoding it.
 
     In Python2, the 'str' type is the same as 'bytes'. In Python3, the
     'str' type is (essentially) Python2's 'unicode' type, and 'bytes' is
     distinct.
+
     """
     if isinstance(b, str):
         # In Python2, this branch is taken for types 'str' and 'bytes'.
@@ -60,28 +99,32 @@ def to_string(b):
     except AttributeError:
         raise TypeError('not sure how to convert %s to %s' % (type(b), str))
 
+
 def detectCPUs():
-    """
-    Detects the number of CPUs on a system. Cribbed from pp.
+    """Detects the number of CPUs on a system.
+
+    Cribbed from pp.
+
     """
     # Linux, Unix and MacOS:
-    if hasattr(os, "sysconf"):
-        if "SC_NPROCESSORS_ONLN" in os.sysconf_names:
+    if hasattr(os, 'sysconf'):
+        if 'SC_NPROCESSORS_ONLN' in os.sysconf_names:
             # Linux & Unix:
-            ncpus = os.sysconf("SC_NPROCESSORS_ONLN")
+            ncpus = os.sysconf('SC_NPROCESSORS_ONLN')
             if isinstance(ncpus, int) and ncpus > 0:
                 return ncpus
-        else: # OSX:
+        else:  # OSX:
             return int(subprocess.check_output(['sysctl', '-n', 'hw.ncpu'],
                                                stderr=subprocess.STDOUT))
     # Windows:
-    if "NUMBER_OF_PROCESSORS" in os.environ:
-        ncpus = int(os.environ["NUMBER_OF_PROCESSORS"])
+    if 'NUMBER_OF_PROCESSORS' in os.environ:
+        ncpus = int(os.environ['NUMBER_OF_PROCESSORS'])
         if ncpus > 0:
             # With more than 32 processes, process creation often fails with
             # "Too many open files".  FIXME: Check if there's a better fix.
             return min(ncpus, 32)
-    return 1 # Default
+    return 1  # Default
+
 
 def mkdir_p(path):
     """mkdir_p(path) - Make the "path" directory, if it does not exist; this
@@ -101,6 +144,7 @@ def mkdir_p(path):
         if e.errno != errno.EEXIST:
             raise
 
+
 def listdir_files(dirname, suffixes=None, exclude_filenames=None):
     """Yields files in a directory.
 
@@ -127,6 +171,7 @@ def listdir_files(dirname, suffixes=None, exclude_filenames=None):
 
     Yields:
         Filenames as returned by os.listdir (generally, str).
+
     """
     if exclude_filenames is None:
         exclude_filenames = set()
@@ -136,20 +181,21 @@ def listdir_files(dirname, suffixes=None, exclude_filenames=None):
         if (os.path.isdir(os.path.join(dirname, filename)) or
             filename.startswith('.') or
             filename in exclude_filenames or
-            not any(filename.endswith(sfx) for sfx in suffixes)):
+                not any(filename.endswith(sfx) for sfx in suffixes)):
             continue
         yield filename
 
-def which(command, paths = None):
+
+def which(command, paths=None):
     """which(command, [paths]) - Look up the given command in the paths string
     (or the PATH environment variable, if unspecified)."""
 
     if paths is None:
-        paths = os.environ.get('PATH','')
+        paths = os.environ.get('PATH', '')
 
     # Check for absolute match first.
     if os.path.isfile(command):
-        return command
+        return os.path.normpath(command)
 
     # Would be nice if Python had a lib function for this.
     if not paths:
@@ -167,26 +213,29 @@ def which(command, paths = None):
         for ext in pathext:
             p = os.path.join(path, command + ext)
             if os.path.exists(p) and not os.path.isdir(p):
-                return p
+                return os.path.normpath(p)
 
     return None
 
+
 def checkToolsPath(dir, tools):
     for tool in tools:
         if not os.path.exists(os.path.join(dir, tool)):
             return False
     return True
 
+
 def whichTools(tools, paths):
     for path in paths.split(os.pathsep):
         if checkToolsPath(path, tools):
             return path
     return None
 
-def printHistogram(items, title = 'Items'):
-    items.sort(key = lambda item: item[1])
 
-    maxValue = max([v for _,v in items])
+def printHistogram(items, title='Items'):
+    items.sort(key=lambda item: item[1])
+
+    maxValue = max([v for _, v in items])
 
     # Select first "nice" bar height that produces more than 10 bars.
     power = int(math.ceil(math.log(maxValue, 10)))
@@ -199,33 +248,34 @@ def printHistogram(items, title = 'Items'):
             power -= 1
 
     histo = [set() for i in range(N)]
-    for name,v in items:
-        bin = min(int(N * v/maxValue), N-1)
+    for name, v in items:
+        bin = min(int(N * v / maxValue), N - 1)
         histo[bin].add(name)
 
     barW = 40
     hr = '-' * (barW + 34)
     print('\nSlowest %s:' % title)
     print(hr)
-    for name,value in items[-20:]:
+    for name, value in items[-20:]:
         print('%.2fs: %s' % (value, name))
     print('\n%s Times:' % title)
     print(hr)
     pDigits = int(math.ceil(math.log(maxValue, 10)))
-    pfDigits = max(0, 3-pDigits)
+    pfDigits = max(0, 3 - pDigits)
     if pfDigits:
         pDigits += pfDigits + 1
     cDigits = int(math.ceil(math.log(len(items), 10)))
-    print("[%s] :: [%s] :: [%s]" % ('Range'.center((pDigits+1)*2 + 3),
+    print('[%s] :: [%s] :: [%s]' % ('Range'.center((pDigits + 1) * 2 + 3),
                                     'Percentage'.center(barW),
-                                    'Count'.center(cDigits*2 + 1)))
+                                    'Count'.center(cDigits * 2 + 1)))
     print(hr)
-    for i,row in enumerate(histo):
+    for i, row in enumerate(histo):
         pct = float(len(row)) / len(items)
         w = int(barW * pct)
-        print("[%*.*fs,%*.*fs) :: [%s%s] :: [%*d/%*d]" % (
-            pDigits, pfDigits, i*barH, pDigits, pfDigits, (i+1)*barH,
-            '*'*w, ' '*(barW-w), cDigits, len(row), cDigits, len(items)))
+        print('[%*.*fs,%*.*fs) :: [%s%s] :: [%*d/%*d]' % (
+            pDigits, pfDigits, i * barH, pDigits, pfDigits, (i + 1) * barH,
+            '*' * w, ' ' * (barW - w), cDigits, len(row), cDigits, len(items)))
+
 
 class ExecuteCommandTimeoutException(Exception):
     def __init__(self, msg, out, err, exitCode):
@@ -238,27 +288,30 @@ class ExecuteCommandTimeoutException(Exception):
         self.err = err
         self.exitCode = exitCode
 
+
 # Close extra file handles on UNIX (on Windows this cannot be done while
 # also redirecting input).
 kUseCloseFDs = not (platform.system() == 'Windows')
+
+
 def executeCommand(command, cwd=None, env=None, input=None, timeout=0):
-    """
-        Execute command ``command`` (list of arguments or string)
-        with
-        * working directory ``cwd`` (str), use None to use the current
-          working directory
-        * environment ``env`` (dict), use None for none
-        * Input to the command ``input`` (str), use string to pass
-          no input.
-        * Max execution time ``timeout`` (int) seconds. Use 0 for no timeout.
-
-        Returns a tuple (out, err, exitCode) where
-        * ``out`` (str) is the standard output of running the command
-        * ``err`` (str) is the standard error of running the command
-        * ``exitCode`` (int) is the exitCode of running the command
-
-        If the timeout is hit an ``ExecuteCommandTimeoutException``
-        is raised.
+    """Execute command ``command`` (list of arguments or string) with.
+
+    * working directory ``cwd`` (str), use None to use the current
+      working directory
+    * environment ``env`` (dict), use None for none
+    * Input to the command ``input`` (str), use string to pass
+      no input.
+    * Max execution time ``timeout`` (int) seconds. Use 0 for no timeout.
+
+    Returns a tuple (out, err, exitCode) where
+    * ``out`` (str) is the standard output of running the command
+    * ``err`` (str) is the standard error of running the command
+    * ``exitCode`` (int) is the exitCode of running the command
+
+    If the timeout is hit an ``ExecuteCommandTimeoutException``
+    is raised.
+
     """
     if input is not None:
         input = to_bytes(input)
@@ -284,7 +337,7 @@ def executeCommand(command, cwd=None, env=None, input=None, timeout=0):
             timerObject = threading.Timer(timeout, killProcess)
             timerObject.start()
 
-        out,err = p.communicate(input=input)
+        out, err = p.communicate(input=input)
         exitCode = p.wait()
     finally:
         if timerObject != None:
@@ -300,7 +353,7 @@ def executeCommand(command, cwd=None, env=None, input=None, timeout=0):
             out=out,
             err=err,
             exitCode=exitCode
-            )
+        )
 
     # Detect Ctrl-C in subprocess.
     if exitCode == -signal.SIGINT:
@@ -308,6 +361,7 @@ def executeCommand(command, cwd=None, env=None, input=None, timeout=0):
 
     return out, err, exitCode
 
+
 def usePlatformSdkOnDarwin(config, lit_config):
     # On Darwin, support relocatable SDKs by providing Clang with a
     # default system root path.
@@ -325,6 +379,7 @@ def usePlatformSdkOnDarwin(config, lit_config):
             lit_config.note('using SDKROOT: %r' % sdk_path)
             config.environment['SDKROOT'] = sdk_path
 
+
 def findPlatformSdkVersionOnMacOS(config, lit_config):
     if 'darwin' in config.target_triple:
         try:
@@ -339,15 +394,15 @@ def findPlatformSdkVersionOnMacOS(config, lit_config):
             return out
     return None
 
+
 def killProcessAndChildren(pid):
-    """
-    This function kills a process with ``pid`` and all its
-    running children (recursively). It is currently implemented
-    using the psutil module which provides a simple platform
-    neutral implementation.
+    """This function kills a process with ``pid`` and all its running children
+    (recursively). It is currently implemented using the psutil module which
+    provides a simple platform neutral implementation.
+
+    TODO: Reimplement this without using psutil so we can       remove
+    our dependency on it.
 
-    TODO: Reimplement this without using psutil so we can
-          remove our dependency on it.
     """
     import psutil
     try:
diff --git a/utils/lit/tests/Inputs/config-map-discovery/driver.py b/utils/lit/tests/Inputs/config-map-discovery/driver.py
new file mode 100644
index 000000000000..db9141b9b1bf
--- /dev/null
+++ b/utils/lit/tests/Inputs/config-map-discovery/driver.py
@@ -0,0 +1,16 @@
+import lit.util
+import os
+import sys
+
+main_config = sys.argv[1]
+main_config = os.path.realpath(main_config)
+main_config = os.path.normcase(main_config)
+
+config_map = {main_config : sys.argv[2]}
+builtin_parameters = {'config_map' : config_map}
+
+if __name__=='__main__':
+    from lit.main import main
+    main_config_dir = os.path.dirname(main_config)
+    sys.argv = [sys.argv[0]] + sys.argv[3:] + [main_config_dir]
+    main(builtin_parameters)
diff --git a/utils/lit/tests/Inputs/config-map-discovery/invalid-test.txt b/utils/lit/tests/Inputs/config-map-discovery/invalid-test.txt
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/utils/lit/tests/Inputs/config-map-discovery/invalid-test.txt
diff --git a/utils/lit/tests/Inputs/config-map-discovery/lit.alt.cfg b/utils/lit/tests/Inputs/config-map-discovery/lit.alt.cfg
new file mode 100644
index 000000000000..c7b303f50a05
--- /dev/null
+++ b/utils/lit/tests/Inputs/config-map-discovery/lit.alt.cfg
@@ -0,0 +1,9 @@
+import lit.formats
+import lit.util
+config.name = 'config-map'
+config.suffixes = ['.txt']
+config.test_format = lit.formats.ShTest()
+
+import os
+config.test_exec_root = os.path.realpath(os.path.dirname(__file__))
+config.test_source_root = os.path.join(config.test_exec_root, "tests")
diff --git a/utils/lit/tests/Inputs/config-map-discovery/main-config/lit.cfg b/utils/lit/tests/Inputs/config-map-discovery/main-config/lit.cfg
new file mode 100644
index 000000000000..380a05beb4a8
--- /dev/null
+++ b/utils/lit/tests/Inputs/config-map-discovery/main-config/lit.cfg
@@ -0,0 +1 @@
+print("ERROR: lit.cfg invoked!")
+\ No newline at end of file
diff --git a/utils/lit/tests/Inputs/config-map-discovery/tests/test1.txt b/utils/lit/tests/Inputs/config-map-discovery/tests/test1.txt
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/utils/lit/tests/Inputs/config-map-discovery/tests/test1.txt
diff --git a/utils/lit/tests/Inputs/config-map-discovery/tests/test2.txt b/utils/lit/tests/Inputs/config-map-discovery/tests/test2.txt
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/utils/lit/tests/Inputs/config-map-discovery/tests/test2.txt
diff --git a/utils/lit/tests/Inputs/googletest-format/DummySubDir/OneTest b/utils/lit/tests/Inputs/googletest-format/DummySubDir/OneTest.py
index dd49f025b1f2..dd49f025b1f2 100755..100644
--- a/utils/lit/tests/Inputs/googletest-format/DummySubDir/OneTest
+++ b/utils/lit/tests/Inputs/googletest-format/DummySubDir/OneTest.py
diff --git a/utils/lit/tests/Inputs/googletest-timeout/DummySubDir/OneTest b/utils/lit/tests/Inputs/googletest-timeout/DummySubDir/OneTest.py
index f3a90ff4cd67..f3a90ff4cd67 100755..100644
--- a/utils/lit/tests/Inputs/googletest-timeout/DummySubDir/OneTest
+++ b/utils/lit/tests/Inputs/googletest-timeout/DummySubDir/OneTest.py
diff --git a/utils/lit/tests/Inputs/googletest-upstream-format/DummySubDir/OneTest b/utils/lit/tests/Inputs/googletest-upstream-format/DummySubDir/OneTest.py
index d7bc5968f261..d7bc5968f261 100755..100644
--- a/utils/lit/tests/Inputs/googletest-upstream-format/DummySubDir/OneTest
+++ b/utils/lit/tests/Inputs/googletest-upstream-format/DummySubDir/OneTest.py
diff --git a/utils/lit/tests/Inputs/max-failures/lit.cfg b/utils/lit/tests/Inputs/max-failures/lit.cfg
index 50d07566e1cc..605bd7233670 100644
--- a/utils/lit/tests/Inputs/max-failures/lit.cfg
+++ b/utils/lit/tests/Inputs/max-failures/lit.cfg
@@ -1,6 +1,2 @@
-import lit.formats
-config.name = 'shtest-shell'
-config.suffixes = ['.txt']
-config.test_format = lit.formats.ShTest()
+lit_config.load_config(config, os.path.dirname(__file__) + "/../shtest-shell/lit.cfg")
 config.test_source_root = os.path.dirname(__file__) + '/../shtest-shell'
-config.test_exec_root = None
diff --git a/utils/lit/tests/Inputs/py-config-discovery/lit.site.cfg.py b/utils/lit/tests/Inputs/py-config-discovery/lit.site.cfg.py
new file mode 100644
index 000000000000..ac273c797c5f
--- /dev/null
+++ b/utils/lit/tests/Inputs/py-config-discovery/lit.site.cfg.py
@@ -0,0 +1,5 @@
+# Load the discovery suite, but with a separate exec root.
+import os
+config.test_exec_root = os.path.dirname(__file__)
+config.test_source_root = os.path.join(os.path.dirname(config.test_exec_root), "discovery")
+lit_config.load_config(config, os.path.join(config.test_source_root, "lit.cfg"))
diff --git a/utils/lit/tests/Inputs/shtest-env/env-u.txt b/utils/lit/tests/Inputs/shtest-env/env-u.txt
new file mode 100644
index 000000000000..9cdf9d08850f
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-env/env-u.txt
@@ -0,0 +1,23 @@
+# Check and make sure preset environment variable were set in lit.cfg
+#
+# RUN: %{python} print_environment.py \
+# RUN:   | FileCheck --check-prefix=CHECK-ENV-PRESET %s
+#
+# Check single unset of environment variable
+#
+# RUN: env -u FOO %{python} print_environment.py \
+# RUN:  | FileCheck --check-prefix=CHECK-ENV-UNSET-1 %s
+#
+# Check multiple unsets of environment variables
+#
+# RUN: env -u FOO -u BAR %{python} print_environment.py \
+# RUN:  | FileCheck --check-prefix=CHECK-ENV-UNSET-MULTIPLE %s
+
+# CHECK-ENV-PRESET: BAR = 2
+# CHECK-ENV-PRESET: FOO = 1
+
+# CHECK-ENV-UNSET-1: BAR = 2
+# CHECK-ENV-UNSET-1-NOT: FOO
+
+# CHECK-ENV-UNSET-MULTIPLE-NOT: BAR
+# CHECK-ENV-UNSET-MULTIPLE-NOT: FOO
diff --git a/utils/lit/tests/Inputs/shtest-env/env.txt b/utils/lit/tests/Inputs/shtest-env/env.txt
new file mode 100644
index 000000000000..aa697b0c4081
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-env/env.txt
@@ -0,0 +1,15 @@
+# Check for simple one environment variable setting
+#
+# RUN: env A_FOO=999 %{python} print_environment.py \
+# RUN:   | FileCheck --check-prefix=CHECK-ENV-1 %s
+#
+# Check for multiple environment variable settings
+#
+# RUN: env A_FOO=1 B_BAR=2 C_OOF=3 %{python} print_environment.py \
+# RUN:   | FileCheck --check-prefix=CHECK-ENV-MULTIPLE %s
+
+# CHECK-ENV-1: A_FOO = 999
+
+# CHECK-ENV-MULTIPLE: A_FOO = 1
+# CHECK-ENV-MULTIPLE: B_BAR = 2
+# CHECK-ENV-MULTIPLE: C_OOF = 3
diff --git a/utils/lit/tests/Inputs/shtest-env/lit.cfg b/utils/lit/tests/Inputs/shtest-env/lit.cfg
new file mode 100644
index 000000000000..23ef60a4b21e
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-env/lit.cfg
@@ -0,0 +1,9 @@
+import lit.formats
+config.name = 'shtest-env'
+config.suffixes = ['.txt']
+config.test_format = lit.formats.ShTest()
+config.test_source_root = None
+config.test_exec_root = None
+config.environment['FOO'] = '1'
+config.environment['BAR'] = '2'
+config.substitutions.append(('%{python}', sys.executable))
diff --git a/utils/lit/tests/Inputs/shtest-env/mixed.txt b/utils/lit/tests/Inputs/shtest-env/mixed.txt
new file mode 100644
index 000000000000..be32d458843b
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-env/mixed.txt
@@ -0,0 +1,18 @@
+# Check for setting and removing one environment variable
+#
+# RUN: env A_FOO=999 -u FOO %{python} print_environment.py \
+# RUN:   | FileCheck --check-prefix=CHECK-ENV-1 %s
+#
+# Check for setting/unsetting multiple environment variables
+#
+# RUN: env A_FOO=1 -u FOO B_BAR=2 -u BAR C_OOF=3 %{python} print_environment.py \
+# RUN:   | FileCheck --check-prefix=CHECK-ENV-MULTIPLE %s
+
+# CHECK-ENV-1: A_FOO = 999
+# CHECK-ENV-1-NOT: FOO
+
+# CHECK-ENV-MULTIPLE: A_FOO = 1
+# CHECK-ENV-MULTIPLE-NOT: BAR
+# CHECK-ENV-MULTIPLE: B_BAR = 2
+# CHECK-ENV-MULTIPLE: C_OOF = 3
+# CHECK-ENV-MULTIPLE-NOT: FOO
diff --git a/utils/lit/tests/Inputs/shtest-env/print_environment.py b/utils/lit/tests/Inputs/shtest-env/print_environment.py
new file mode 100644
index 000000000000..1add4079d58f
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-env/print_environment.py
@@ -0,0 +1,8 @@
+#!/usr/bin/env python
+
+import os
+
+sorted_environment = sorted(os.environ.items())
+
+for name,value in sorted_environment:
+    print name,'=',value
diff --git a/utils/lit/tests/Inputs/shtest-env/shtest-env.py b/utils/lit/tests/Inputs/shtest-env/shtest-env.py
new file mode 100644
index 000000000000..fc5f973e676f
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-env/shtest-env.py
@@ -0,0 +1,3 @@
+# Check the env command
+#
+# RUN: %{lit} -a -v %{inputs}/shtest-env
diff --git a/utils/lit/tests/Inputs/shtest-format/external_shell/fail_with_bad_encoding.txt b/utils/lit/tests/Inputs/shtest-format/external_shell/fail_with_bad_encoding.txt
index f6157e66c97c..ce38831e32ed 100644
--- a/utils/lit/tests/Inputs/shtest-format/external_shell/fail_with_bad_encoding.txt
+++ b/utils/lit/tests/Inputs/shtest-format/external_shell/fail_with_bad_encoding.txt
@@ -1,5 +1,5 @@
 # Run a command that fails with error on stdout.
 #
-# RUN: %S/write-bad-encoding.sh
+# RUN: "%{python}" %S/write-bad-encoding.py
 # RUN: false
 
diff --git a/utils/lit/tests/Inputs/shtest-format/external_shell/write-bad-encoding.py b/utils/lit/tests/Inputs/shtest-format/external_shell/write-bad-encoding.py
new file mode 100644
index 000000000000..a5a2bc9da8af
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-format/external_shell/write-bad-encoding.py
@@ -0,0 +1,6 @@
+#!/usr/bin/env python
+
+import sys
+
+getattr(sys.stdout, "buffer", sys.stdout).write(b"a line with bad encoding: \xc2.")
+sys.stdout.flush()
diff --git a/utils/lit/tests/Inputs/shtest-format/external_shell/write-bad-encoding.sh b/utils/lit/tests/Inputs/shtest-format/external_shell/write-bad-encoding.sh
deleted file mode 100755
index 6b622cb232e2..000000000000
--- a/utils/lit/tests/Inputs/shtest-format/external_shell/write-bad-encoding.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/sh
-
-echo "a line with bad encoding: �."
diff --git a/utils/lit/tests/Inputs/shtest-format/lit.cfg b/utils/lit/tests/Inputs/shtest-format/lit.cfg
index 9b47985a3d86..0d6488848b4f 100644
--- a/utils/lit/tests/Inputs/shtest-format/lit.cfg
+++ b/utils/lit/tests/Inputs/shtest-format/lit.cfg
@@ -6,3 +6,4 @@ config.test_source_root = None
 config.test_exec_root = None
 config.target_triple = 'x86_64-unknown-unknown'
 config.available_features.add('a-present-feature')
+config.substitutions.append(('%{python}', sys.executable))
diff --git a/utils/lit/tests/Inputs/shtest-shell/check_path.py b/utils/lit/tests/Inputs/shtest-shell/check_path.py
new file mode 100644
index 000000000000..c1d279700de4
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/check_path.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python
+
+import os
+import sys
+
+
+def check_path(argv):
+    if len(argv) < 3:
+        print("Wrong number of args")
+        return 1
+
+    type = argv[1]
+    paths = argv[2:]
+    exit_code = 0
+
+    if type == 'dir':
+        for idx, dir in enumerate(paths):
+            print(os.path.isdir(dir))
+    elif type == 'file':
+        for idx, file in enumerate(paths):
+            print(os.path.isfile(file))
+    else:
+        print("Unrecognised type {}".format(type))
+        exit_code = 1
+    return exit_code
+
+if __name__ == '__main__':
+    sys.exit (check_path (sys.argv))
diff --git a/utils/lit/tests/Inputs/shtest-shell/diff-error-0.txt b/utils/lit/tests/Inputs/shtest-shell/diff-error-0.txt
new file mode 100644
index 000000000000..81888cf81974
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/diff-error-0.txt
@@ -0,0 +1,3 @@
+# Check error on a unsupported diff (cannot be part of a pipeline).
+#
+# RUN: diff diff-error-0.txt diff-error-0.txt | echo Output
diff --git a/utils/lit/tests/Inputs/shtest-shell/diff-error-1.txt b/utils/lit/tests/Inputs/shtest-shell/diff-error-1.txt
new file mode 100644
index 000000000000..9836e0ea9d45
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/diff-error-1.txt
@@ -0,0 +1,3 @@
+# Check error on a unsupported diff (not support the -B option).
+#
+# RUN: diff -B temp1.txt temp2.txt
diff --git a/utils/lit/tests/Inputs/shtest-shell/diff-error-2.txt b/utils/lit/tests/Inputs/shtest-shell/diff-error-2.txt
new file mode 100644
index 000000000000..9c0da6aa6f69
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/diff-error-2.txt
@@ -0,0 +1,3 @@
+# Check error on an internal shell error (missing tofile)
+#
+# RUN: diff temp.txt
diff --git a/utils/lit/tests/Inputs/shtest-shell/diff-error-3.txt b/utils/lit/tests/Inputs/shtest-shell/diff-error-3.txt
new file mode 100644
index 000000000000..31128193c019
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/diff-error-3.txt
@@ -0,0 +1,3 @@
+# Check error on an internal shell error (unable to find compared files)
+#
+# RUN: diff temp.txt temp1.txt
diff --git a/utils/lit/tests/Inputs/shtest-shell/diff-error-4.txt b/utils/lit/tests/Inputs/shtest-shell/diff-error-4.txt
new file mode 100644
index 000000000000..8569e33e284a
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/diff-error-4.txt
@@ -0,0 +1,5 @@
+# Check exit code error (file's contents are different)
+#
+# RUN: echo "hello-first" > %t
+# RUN: echo "hello-second" > %t1
+# RUN: diff %t %t1
diff --git a/utils/lit/tests/Inputs/shtest-shell/diff-error-5.txt b/utils/lit/tests/Inputs/shtest-shell/diff-error-5.txt
new file mode 100644
index 000000000000..6d8d2e5c334d
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/diff-error-5.txt
@@ -0,0 +1,3 @@
+# Check error on an internal shell error (missing operand)
+#
+# RUN: diff -u
diff --git a/utils/lit/tests/Inputs/shtest-shell/diff-error-6.txt b/utils/lit/tests/Inputs/shtest-shell/diff-error-6.txt
new file mode 100644
index 000000000000..cc13d13c6475
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/diff-error-6.txt
@@ -0,0 +1,3 @@
+# Check error on an internal shell error (extra operand)
+#
+# RUN: diff -u a.txt b.txt c.txt
diff --git a/utils/lit/tests/Inputs/shtest-shell/lit.cfg b/utils/lit/tests/Inputs/shtest-shell/lit.cfg
index 7f31129ad114..761dc6748855 100644
--- a/utils/lit/tests/Inputs/shtest-shell/lit.cfg
+++ b/utils/lit/tests/Inputs/shtest-shell/lit.cfg
@@ -4,3 +4,4 @@ config.suffixes = ['.txt']
 config.test_format = lit.formats.ShTest()
 config.test_source_root = None
 config.test_exec_root = None
+config.substitutions.append(('%{python}', sys.executable))
diff --git a/utils/lit/tests/Inputs/shtest-shell/mkdir-error-0.txt b/utils/lit/tests/Inputs/shtest-shell/mkdir-error-0.txt
new file mode 100644
index 000000000000..c29d42110670
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/mkdir-error-0.txt
@@ -0,0 +1,3 @@
+# Check error on a unsupported mkdir (cannot be part of a pipeline).
+#
+# RUN: mkdir -p temp | rm -rf temp
diff --git a/utils/lit/tests/Inputs/shtest-shell/mkdir-error-1.txt b/utils/lit/tests/Inputs/shtest-shell/mkdir-error-1.txt
new file mode 100644
index 000000000000..8931f3d54eb4
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/mkdir-error-1.txt
@@ -0,0 +1,3 @@
+# Check error on a unsupported mkdir (only does not support -m option).
+#
+# RUN: mkdir -p -m 777 temp
diff --git a/utils/lit/tests/Inputs/shtest-shell/mkdir-error-2.txt b/utils/lit/tests/Inputs/shtest-shell/mkdir-error-2.txt
new file mode 100644
index 000000000000..401dbecf14b2
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/mkdir-error-2.txt
@@ -0,0 +1,3 @@
+# Check error on a unsupported mkdir (missing operand).
+#
+# RUN: mkdir -p
diff --git a/utils/lit/tests/Inputs/shtest-shell/redirects.txt b/utils/lit/tests/Inputs/shtest-shell/redirects.txt
index 6be88b67ce1b..f90c2b7868b7 100644
--- a/utils/lit/tests/Inputs/shtest-shell/redirects.txt
+++ b/utils/lit/tests/Inputs/shtest-shell/redirects.txt
@@ -17,13 +17,13 @@
 # Check stderr redirect (2> and 2>>).
 #
 # RUN: echo "not-present" > %t.stderr-write
-# RUN: %S/write-to-stderr.sh 2> %t.stderr-write
+# RUN: "%{python}" %S/write-to-stderr.py 2> %t.stderr-write
 # RUN: FileCheck --check-prefix=STDERR-WRITE < %t.stderr-write %s
 #
 # STDERR-WRITE-NOT: not-present
 # STDERR-WRITE: a line on stderr
 #
-# RUN: %S/write-to-stderr.sh 2>> %t.stderr-write
+# RUN: "%{python}" %S/write-to-stderr.py 2>> %t.stderr-write
 # RUN: FileCheck --check-prefix=STDERR-APPEND < %t.stderr-write %s
 #
 # STDERR-APPEND: a line on stderr
@@ -33,7 +33,7 @@
 # Check combined redirect (&>).
 #
 # RUN: echo "not-present" > %t.combined
-# RUN: %S/write-to-stdout-and-stderr.sh &> %t.combined
+# RUN: "%{python}" %S/write-to-stdout-and-stderr.py &> %t.combined
 # RUN: FileCheck --check-prefix=COMBINED-WRITE < %t.combined %s
 #
 # COMBINED-WRITE-NOT: not-present
diff --git a/utils/lit/tests/Inputs/shtest-shell/rm-error-0.txt b/utils/lit/tests/Inputs/shtest-shell/rm-error-0.txt
new file mode 100644
index 000000000000..52966a813c4a
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/rm-error-0.txt
@@ -0,0 +1,3 @@
+# Check error on a unsupported rm. (cannot be part of a pipeline)
+#
+# RUN: rm -rf temp | echo "hello"
diff --git a/utils/lit/tests/Inputs/shtest-shell/rm-error-1.txt b/utils/lit/tests/Inputs/shtest-shell/rm-error-1.txt
new file mode 100644
index 000000000000..5065ea744207
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/rm-error-1.txt
@@ -0,0 +1,3 @@
+# Check error on a unsupported rm (only does not support -v option).
+#
+# RUN: rm -f -v temp
diff --git a/utils/lit/tests/Inputs/shtest-shell/rm-error-2.txt b/utils/lit/tests/Inputs/shtest-shell/rm-error-2.txt
new file mode 100644
index 000000000000..71bfe0427b15
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/rm-error-2.txt
@@ -0,0 +1,4 @@
+# Check error on a unsupported rm (only supports -f option and in combination with -r).
+#
+#### RUN: rm -r hello
+# RUN: rm hello
diff --git a/utils/lit/tests/Inputs/shtest-shell/rm-error-3.txt b/utils/lit/tests/Inputs/shtest-shell/rm-error-3.txt
new file mode 100644
index 000000000000..1bab79aad082
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/rm-error-3.txt
@@ -0,0 +1,4 @@
+# Check error on a unsupported rm (can't remove test since it is a directory).
+#
+# RUN: mkdir -p test
+# RUN: rm -f test
diff --git a/utils/lit/tests/Inputs/shtest-shell/valid-shell.txt b/utils/lit/tests/Inputs/shtest-shell/valid-shell.txt
new file mode 100644
index 000000000000..d5cbf863e735
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/valid-shell.txt
@@ -0,0 +1,87 @@
+# Check rm file operations.
+# Check force remove commands success whether the file does or doesn't exist.
+#
+# RUN: rm -f %t.write
+# RUN: "%{python}" %S/check_path.py file %t.write > %t.out
+# RUN: FileCheck --check-prefix=REMOVE-FILE < %t.out %s
+# RUN: echo "create a temp file" > %t.write
+# RUN: "%{python}" %S/check_path.py file %t.write > %t.out
+# RUN: FileCheck --check-prefix=FILE-EXIST < %t.out %s
+# RUN: rm -f %t.write
+# RUN: "%{python}" %S/check_path.py file %t.write > %t.out
+# RUN: FileCheck --check-prefix=REMOVE-FILE < %t.out %s
+#
+# REMOVE-FILE: False
+# FILE-EXIST: True
+#
+# Check mkdir and rm folder operations.
+# Check force remove commands success whether the directory does or doesn't exist.
+#
+# Check the mkdir command with -p option.
+# RUN: rm -f -r %T/test
+# RUN: "%{python}" %S/check_path.py dir %T/test > %t.out
+# RUN: FileCheck --check-prefix=REMOVE-PARENT-DIR < %t.out %s
+# RUN: mkdir -p %T/test
+# RUN: "%{python}" %S/check_path.py dir %T/test > %t.out
+# RUN: FileCheck --check-prefix=MAKE-PARENT-DIR < %t.out %s
+# RUN: rm -f %T/test || true
+# RUN: rm -f -r %T/test
+# RUN: "%{python}" %S/check_path.py dir %T/test > %t.out
+# RUN: FileCheck --check-prefix=REMOVE-PARENT-DIR < %t.out %s
+#
+# MAKE-PARENT-DIR: True
+# REMOVE-PARENT-DIR: False
+#
+# Check the mkdir command without -p option.
+#
+# RUN: rm -rf %T/test1
+# RUN: mkdir %T/test1
+# RUN: "%{python}" %S/check_path.py dir %T/test1 > %t.out
+# RUN: FileCheck --check-prefix=MAKE-DIR < %t.out %s
+# RUN: cd %T/test1 && mkdir foo
+# RUN: "%{python}" %S/check_path.py dir %T/test1 > %t.out
+# RUN: FileCheck --check-prefix=MAKE-DIR < %t.out %s
+# RUN: cd %T && rm -rf %T/test1
+# RUN: "%{python}" %S/check_path.py dir %T/test1 > %t.out
+# RUN: FileCheck --check-prefix=REMOVE-DIR < %t.out %s
+#
+# MAKE-DIR: True
+# REMOVE-DIR: False
+#
+# Check creating and removing multiple folders and rm * operation.
+#
+# RUN: rm -rf %T/test
+# RUN: mkdir -p %T/test/test1 %T/test/test2
+# RUN: "%{python}" %S/check_path.py dir %T/test %T/test/test1 %T/test/test2 > %t.out
+# RUN: FileCheck --check-prefix=DIRS-EXIST < %t.out %s
+# RUN: mkdir %T/test || true
+# RUN: echo "create a temp file" > %T/test/temp.write
+# RUN: echo "create a temp1 file" > %T/test/test1/temp1.write
+# RUN: echo "create a temp2 file" > %T/test/test2/temp2.write
+# RUN: "%{python}" %S/check_path.py file %T/test/temp.write %T/test/test1/temp1.write %T/test/test2/temp2.write> %t.out
+# RUN: FileCheck --check-prefix=FILES-EXIST < %t.out %s
+# RUN: rm -r -f %T/*
+# RUN: "%{python}" %S/check_path.py dir %T/test > %t.out
+# RUN: FileCheck --check-prefix=REMOVE-ALL < %t.out %s
+#
+# DIRS-EXIST: True
+# DIRS-EXIST-NEXT: True
+# DIRS-EXIST-NEXT: True
+# FILES-EXIST: True
+# FILES-EXIST-NEXT: True
+# FILES-EXIST-NEXT: True
+# REMOVE-ALL: False
+#
+# Check diff operations.
+#
+# RUN: echo "hello" > %t.stdout
+# RUN: echo "hello" > %t1.stdout
+# RUN: diff %t.stdout %t1.stdout
+# RUN: diff -u %t.stdout %t1.stdout
+# RUN: echo "hello-2" > %t1.stdout
+# RUN: diff %t.stdout %t1.stdout || true
+#
+# RUN: mkdir -p %T/dir1 %T/dir2
+# RUN: cd %T/dir1 && echo "hello" > temp1.txt
+# RUN: cd %T/dir2 && echo "hello" > temp2.txt
+# RUN: diff temp2.txt ../dir1/temp1.txt
diff --git a/utils/lit/tests/Inputs/shtest-shell/write-to-stderr.py b/utils/lit/tests/Inputs/shtest-shell/write-to-stderr.py
new file mode 100644
index 000000000000..9463251d823a
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/write-to-stderr.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python
+
+import sys
+
+
+sys.stderr.write("a line on stderr\n")
+sys.stderr.flush()
diff --git a/utils/lit/tests/Inputs/shtest-shell/write-to-stderr.sh b/utils/lit/tests/Inputs/shtest-shell/write-to-stderr.sh
deleted file mode 100755
index ead3fd3ce377..000000000000
--- a/utils/lit/tests/Inputs/shtest-shell/write-to-stderr.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/sh
-
-echo "a line on stderr" 1>&2
diff --git a/utils/lit/tests/Inputs/shtest-shell/write-to-stdout-and-stderr.py b/utils/lit/tests/Inputs/shtest-shell/write-to-stdout-and-stderr.py
new file mode 100644
index 000000000000..357089d4899b
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/write-to-stdout-and-stderr.py
@@ -0,0 +1,10 @@
+#!/usr/bin/env python
+
+import sys
+
+
+sys.stdout.write("a line on stdout\n")
+sys.stdout.flush()
+
+sys.stderr.write("a line on stderr\n")
+sys.stderr.flush()
diff --git a/utils/lit/tests/Inputs/shtest-shell/write-to-stdout-and-stderr.sh b/utils/lit/tests/Inputs/shtest-shell/write-to-stdout-and-stderr.sh
deleted file mode 100755
index f20de5d9042d..000000000000
--- a/utils/lit/tests/Inputs/shtest-shell/write-to-stdout-and-stderr.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-
-echo "a line on stdout"
-echo "a line on stderr" 1>&2
diff --git a/utils/lit/tests/discovery.py b/utils/lit/tests/discovery.py
index 55e54088b587..dbabeb39f19f 100644
--- a/utils/lit/tests/discovery.py
+++ b/utils/lit/tests/discovery.py
@@ -6,17 +6,17 @@
 # RUN: FileCheck --check-prefix=CHECK-BASIC-OUT < %t.out %s
 # RUN: FileCheck --check-prefix=CHECK-BASIC-ERR < %t.err %s
 #
-# CHECK-BASIC-ERR: loading suite config '{{.*}}/discovery/lit.cfg'
-# CHECK-BASIC-ERR-DAG: loading suite config '{{.*}}/discovery/subsuite/lit.cfg'
-# CHECK-BASIC-ERR-DAG: loading local config '{{.*}}/discovery/subdir/lit.local.cfg'
+# CHECK-BASIC-ERR: loading suite config '{{.*(/|\\\\)discovery(/|\\\\)lit.cfg}}'
+# CHECK-BASIC-ERR-DAG: loading suite config '{{.*(/|\\\\)discovery(/|\\\\)subsuite(/|\\\\)lit.cfg}}'
+# CHECK-BASIC-ERR-DAG: loading local config '{{.*(/|\\\\)discovery(/|\\\\)subdir(/|\\\\)lit.local.cfg}}'
 #
 # CHECK-BASIC-OUT: -- Test Suites --
 # CHECK-BASIC-OUT:   sub-suite - 2 tests
-# CHECK-BASIC-OUT:     Source Root: {{.*/discovery/subsuite$}}
-# CHECK-BASIC-OUT:     Exec Root  : {{.*/discovery/subsuite$}}
+# CHECK-BASIC-OUT:     Source Root: {{.*[/\\]discovery[/\\]subsuite$}}
+# CHECK-BASIC-OUT:     Exec Root  : {{.*[/\\]discovery[/\\]subsuite$}}
 # CHECK-BASIC-OUT:   top-level-suite - 3 tests
-# CHECK-BASIC-OUT:     Source Root: {{.*/discovery$}}
-# CHECK-BASIC-OUT:     Exec Root  : {{.*/discovery$}}
+# CHECK-BASIC-OUT:     Source Root: {{.*[/\\]discovery$}}
+# CHECK-BASIC-OUT:     Exec Root  : {{.*[/\\]discovery$}}
 #
 # CHECK-BASIC-OUT: -- Available Tests --
 # CHECK-BASIC-OUT: sub-suite :: test-one
@@ -25,6 +25,28 @@
 # CHECK-BASIC-OUT: top-level-suite :: test-one
 # CHECK-BASIC-OUT: top-level-suite :: test-two
 
+# Check discovery when providing the special builtin 'config_map'
+# RUN: %{python} %{inputs}/config-map-discovery/driver.py \
+# RUN:           %{inputs}/config-map-discovery/main-config/lit.cfg \
+# RUN:           %{inputs}/config-map-discovery/lit.alt.cfg \
+# RUN:           --single-process --debug --show-tests --show-suites > %t.out 2> %t.err
+# RUN: FileCheck --check-prefix=CHECK-CONFIG-MAP-OUT < %t.out %s
+# RUN: FileCheck --check-prefix=CHECK-CONFIG-MAP-ERR < %t.err %s
+
+# CHECK-CONFIG-MAP-OUT-NOT: ERROR: lit.cfg invoked
+# CHECK-CONFIG-MAP-OUT: -- Test Suites --
+# CHECK-CONFIG-MAP-OUT:   config-map - 2 tests
+# CHECK-CONFIG-MAP-OUT:     Source Root: {{.*[/\\]config-map-discovery[/\\]tests}}
+# CHECK-CONFIG-MAP-OUT:     Exec Root  : {{.*[/\\]tests[/\\]Inputs[/\\]config-map-discovery}}
+# CHECK-CONFIG-MAP-OUT: -- Available Tests --
+# CHECK-CONFIG-MAP-OUT-NOT: invalid-test.txt
+# CHECK-CONFIG-MAP-OUT:   config-map :: test1.txt
+# CHECK-CONFIG-MAP-OUT:   config-map :: test2.txt
+
+# CHECK-CONFIG-MAP-ERR: loading suite config '{{.*}}lit.alt.cfg'
+# CHECK-CONFIG-MAP-ERR: loaded config '{{.*}}lit.alt.cfg'
+# CHECK-CONFIG-MAP-ERR: resolved input '{{.*(/|\\\\)config-map-discovery(/|\\\\)main-config}}' to 'config-map'::()
+
 
 # Check discovery when exact test names are given.
 #
@@ -38,6 +60,34 @@
 # CHECK-EXACT-TEST: sub-suite :: test-one
 # CHECK-EXACT-TEST: top-level-suite :: subdir/test-three
 
+# Check discovery when config files end in .py
+# RUN: %{lit} %{inputs}/py-config-discovery \
+# RUN:   -j 1 --debug --show-tests --show-suites \
+# RUN:   -v > %t.out 2> %t.err
+# RUN: FileCheck --check-prefix=CHECK-PYCONFIG-OUT < %t.out %s
+# RUN: FileCheck --check-prefix=CHECK-PYCONFIG-ERR < %t.err %s
+#
+# CHECK-PYCONFIG-ERR: loading suite config '{{.*(/|\\\\)py-config-discovery(/|\\\\)lit.site.cfg.py}}'
+# CHECK-PYCONFIG-ERR: load_config from '{{.*(/|\\\\)discovery(/|\\\\)lit.cfg}}'
+# CHECK-PYCONFIG-ERR: loaded config '{{.*(/|\\\\)discovery(/|\\\\)lit.cfg}}'
+# CHECK-PYCONFIG-ERR: loaded config '{{.*(/|\\\\)py-config-discovery(/|\\\\)lit.site.cfg.py}}'
+# CHECK-PYCONFIG-ERR-DAG: loading suite config '{{.*(/|\\\\)discovery(/|\\\\)subsuite(/|\\\\)lit.cfg}}'
+# CHECK-PYCONFIG-ERR-DAG: loading local config '{{.*(/|\\\\)discovery(/|\\\\)subdir(/|\\\\)lit.local.cfg}}'
+#
+# CHECK-PYCONFIG-OUT: -- Test Suites --
+# CHECK-PYCONFIG-OUT:   sub-suite - 2 tests
+# CHECK-PYCONFIG-OUT:     Source Root: {{.*[/\\]discovery[/\\]subsuite$}}
+# CHECK-PYCONFIG-OUT:     Exec Root  : {{.*[/\\]discovery[/\\]subsuite$}}
+# CHECK-PYCONFIG-OUT:   top-level-suite - 3 tests
+# CHECK-PYCONFIG-OUT:     Source Root: {{.*[/\\]discovery$}}
+# CHECK-PYCONFIG-OUT:     Exec Root  : {{.*[/\\]py-config-discovery$}}
+#
+# CHECK-PYCONFIG-OUT: -- Available Tests --
+# CHECK-PYCONFIG-OUT: sub-suite :: test-one
+# CHECK-PYCONFIG-OUT: sub-suite :: test-two
+# CHECK-PYCONFIG-OUT: top-level-suite :: subdir/test-three
+# CHECK-PYCONFIG-OUT: top-level-suite :: test-one
+# CHECK-PYCONFIG-OUT: top-level-suite :: test-two
 
 # Check discovery when using an exec path.
 #
@@ -47,20 +97,20 @@
 # RUN: FileCheck --check-prefix=CHECK-ASEXEC-OUT < %t.out %s
 # RUN: FileCheck --check-prefix=CHECK-ASEXEC-ERR < %t.err %s
 #
-# CHECK-ASEXEC-ERR: loading suite config '{{.*}}/exec-discovery/lit.site.cfg'
-# CHECK-ASEXEC-ERR: load_config from '{{.*}}/discovery/lit.cfg'
-# CHECK-ASEXEC-ERR: loaded config '{{.*}}/discovery/lit.cfg'
-# CHECK-ASEXEC-ERR: loaded config '{{.*}}/exec-discovery/lit.site.cfg'
-# CHECK-ASEXEC-ERR-DAG: loading suite config '{{.*}}/discovery/subsuite/lit.cfg'
-# CHECK-ASEXEC-ERR-DAG: loading local config '{{.*}}/discovery/subdir/lit.local.cfg'
+# CHECK-ASEXEC-ERR: loading suite config '{{.*(/|\\\\)exec-discovery(/|\\\\)lit.site.cfg}}'
+# CHECK-ASEXEC-ERR: load_config from '{{.*(/|\\\\)discovery(/|\\\\)lit.cfg}}'
+# CHECK-ASEXEC-ERR: loaded config '{{.*(/|\\\\)discovery(/|\\\\)lit.cfg}}'
+# CHECK-ASEXEC-ERR: loaded config '{{.*(/|\\\\)exec-discovery(/|\\\\)lit.site.cfg}}'
+# CHECK-ASEXEC-ERR-DAG: loading suite config '{{.*(/|\\\\)discovery(/|\\\\)subsuite(/|\\\\)lit.cfg}}'
+# CHECK-ASEXEC-ERR-DAG: loading local config '{{.*(/|\\\\)discovery(/|\\\\)subdir(/|\\\\)lit.local.cfg}}'
 #
 # CHECK-ASEXEC-OUT: -- Test Suites --
 # CHECK-ASEXEC-OUT:   sub-suite - 2 tests
-# CHECK-ASEXEC-OUT:     Source Root: {{.*/discovery/subsuite$}}
-# CHECK-ASEXEC-OUT:     Exec Root  : {{.*/discovery/subsuite$}}
+# CHECK-ASEXEC-OUT:     Source Root: {{.*[/\\]discovery[/\\]subsuite$}}
+# CHECK-ASEXEC-OUT:     Exec Root  : {{.*[/\\]discovery[/\\]subsuite$}}
 # CHECK-ASEXEC-OUT:   top-level-suite - 3 tests
-# CHECK-ASEXEC-OUT:     Source Root: {{.*/discovery$}}
-# CHECK-ASEXEC-OUT:     Exec Root  : {{.*/exec-discovery$}}
+# CHECK-ASEXEC-OUT:     Source Root: {{.*[/\\]discovery$}}
+# CHECK-ASEXEC-OUT:     Exec Root  : {{.*[/\\]exec-discovery$}}
 #
 # CHECK-ASEXEC-OUT: -- Available Tests --
 # CHECK-ASEXEC-OUT: sub-suite :: test-one
@@ -90,8 +140,15 @@
 # RUN:   -j 1 --show-tests --show-suites -v > %t.out
 # RUN: FileCheck --check-prefix=CHECK-ASEXEC-INTREE < %t.out %s
 #
+# Try it again after cd'ing into the test suite using a short relative path.
+#
+# RUN: cd %{inputs}/exec-discovery-in-tree/obj/
+# RUN: %{lit} . \
+# RUN:   -j 1 --show-tests --show-suites -v > %t.out
+# RUN: FileCheck --check-prefix=CHECK-ASEXEC-INTREE < %t.out %s
+#
 #      CHECK-ASEXEC-INTREE:   exec-discovery-in-tree-suite - 1 tests
-# CHECK-ASEXEC-INTREE-NEXT:     Source Root: {{.*/exec-discovery-in-tree$}}
-# CHECK-ASEXEC-INTREE-NEXT:     Exec Root  : {{.*/exec-discovery-in-tree/obj$}}
+# CHECK-ASEXEC-INTREE-NEXT:     Source Root: {{.*[/\\]exec-discovery-in-tree$}}
+# CHECK-ASEXEC-INTREE-NEXT:     Exec Root  : {{.*[/\\]exec-discovery-in-tree[/\\]obj$}}
 # CHECK-ASEXEC-INTREE-NEXT: -- Available Tests --
 # CHECK-ASEXEC-INTREE-NEXT: exec-discovery-in-tree-suite :: test-one
diff --git a/utils/lit/tests/googletest-format.py b/utils/lit/tests/googletest-format.py
index a62fd1b3ccaf..094c6cfc514c 100644
--- a/utils/lit/tests/googletest-format.py
+++ b/utils/lit/tests/googletest-format.py
@@ -1,19 +1,22 @@
 # Check the various features of the GoogleTest format.
 #
 # RUN: not %{lit} -j 1 -v %{inputs}/googletest-format > %t.out
+# FIXME: Temporarily dump test output so we can debug failing tests on
+# buildbots.
+# RUN: cat %t.out
 # RUN: FileCheck < %t.out %s
 #
 # END.
 
 # CHECK: -- Testing:
-# CHECK: PASS: googletest-format :: DummySubDir/OneTest/FirstTest.subTestA
-# CHECK: FAIL: googletest-format :: DummySubDir/OneTest/FirstTest.subTestB
-# CHECK-NEXT: *** TEST 'googletest-format :: DummySubDir/OneTest/FirstTest.subTestB' FAILED ***
+# CHECK: PASS: googletest-format :: {{[Dd]ummy[Ss]ub[Dd]ir}}/OneTest.py/FirstTest.subTestA
+# CHECK: FAIL: googletest-format :: {{[Dd]ummy[Ss]ub[Dd]ir}}/OneTest.py/FirstTest.subTestB
+# CHECK-NEXT: *** TEST 'googletest-format :: {{[Dd]ummy[Ss]ub[Dd]ir}}/OneTest.py/FirstTest.subTestB' FAILED ***
 # CHECK-NEXT: I am subTest B, I FAIL
 # CHECK-NEXT: And I have two lines of output
 # CHECK: ***
-# CHECK: PASS: googletest-format :: DummySubDir/OneTest/ParameterizedTest/0.subTest
-# CHECK: PASS: googletest-format :: DummySubDir/OneTest/ParameterizedTest/1.subTest
+# CHECK: PASS: googletest-format :: {{[Dd]ummy[Ss]ub[Dd]ir}}/OneTest.py/ParameterizedTest/0.subTest
+# CHECK: PASS: googletest-format :: {{[Dd]ummy[Ss]ub[Dd]ir}}/OneTest.py/ParameterizedTest/1.subTest
 # CHECK: Failing Tests (1)
 # CHECK: Expected Passes    : 3
 # CHECK: Unexpected Failures: 1
diff --git a/utils/lit/tests/googletest-timeout.py b/utils/lit/tests/googletest-timeout.py
index 46acf32b3a61..8b7d10fc1f03 100644
--- a/utils/lit/tests/googletest-timeout.py
+++ b/utils/lit/tests/googletest-timeout.py
@@ -13,9 +13,9 @@
 # RUN: FileCheck < %t.cfgset.out %s
 
 # CHECK: -- Testing:
-# CHECK: PASS: googletest-timeout :: DummySubDir/OneTest/FirstTest.subTestA
-# CHECK: TIMEOUT: googletest-timeout :: DummySubDir/OneTest/FirstTest.subTestB
-# CHECK: TIMEOUT: googletest-timeout :: DummySubDir/OneTest/FirstTest.subTestC
+# CHECK: PASS: googletest-timeout :: {{[Dd]ummy[Ss]ub[Dd]ir}}/OneTest.py/FirstTest.subTestA
+# CHECK: TIMEOUT: googletest-timeout :: {{[Dd]ummy[Ss]ub[Dd]ir}}/OneTest.py/FirstTest.subTestB
+# CHECK: TIMEOUT: googletest-timeout :: {{[Dd]ummy[Ss]ub[Dd]ir}}/OneTest.py/FirstTest.subTestC
 # CHECK: Expected Passes    : 1
 # CHECK: Individual Timeouts: 2
 
diff --git a/utils/lit/tests/googletest-upstream-format.py b/utils/lit/tests/googletest-upstream-format.py
index 1fc7c7c4a5ad..938740d80e7b 100644
--- a/utils/lit/tests/googletest-upstream-format.py
+++ b/utils/lit/tests/googletest-upstream-format.py
@@ -6,15 +6,15 @@
 # END.
 
 # CHECK: -- Testing:
-# CHECK: PASS: googletest-upstream-format :: DummySubDir/OneTest/FirstTest.subTestA
-# CHECK: FAIL: googletest-upstream-format :: DummySubDir/OneTest/FirstTest.subTestB
-# CHECK-NEXT: *** TEST 'googletest-upstream-format :: DummySubDir/OneTest/FirstTest.subTestB' FAILED ***
+# CHECK: PASS: googletest-upstream-format :: {{[Dd]ummy[Ss]ub[Dd]ir}}/OneTest.py/FirstTest.subTestA
+# CHECK: FAIL: googletest-upstream-format :: {{[Dd]ummy[Ss]ub[Dd]ir}}/OneTest.py/FirstTest.subTestB
+# CHECK-NEXT: *** TEST 'googletest-upstream-format :: {{[Dd]ummy[Ss]ub[Dd]ir}}/OneTest.py/FirstTest.subTestB' FAILED ***
 # CHECK-NEXT: Running main() from gtest_main.cc
 # CHECK-NEXT: I am subTest B, I FAIL
 # CHECK-NEXT: And I have two lines of output
 # CHECK: ***
-# CHECK: PASS: googletest-upstream-format :: DummySubDir/OneTest/ParameterizedTest/0.subTest
-# CHECK: PASS: googletest-upstream-format :: DummySubDir/OneTest/ParameterizedTest/1.subTest
+# CHECK: PASS: googletest-upstream-format :: {{[Dd]ummy[Ss]ub[Dd]ir}}/OneTest.py/ParameterizedTest/0.subTest
+# CHECK: PASS: googletest-upstream-format :: {{[Dd]ummy[Ss]ub[Dd]ir}}/OneTest.py/ParameterizedTest/1.subTest
 # CHECK: Failing Tests (1)
 # CHECK: Expected Passes    : 3
 # CHECK: Unexpected Failures: 1
diff --git a/utils/lit/tests/lit.cfg b/utils/lit/tests/lit.cfg
index 4b38241d5a7d..75d1b5eac857 100644
--- a/utils/lit/tests/lit.cfg
+++ b/utils/lit/tests/lit.cfg
@@ -26,12 +26,20 @@ config.test_exec_root = config.test_source_root
 config.target_triple = '(unused)'
 
 src_root = os.path.join(config.test_source_root, '..')
-config.environment['PYTHONPATH'] = src_root
+llvm_src_root = getattr(config, 'llvm_src_root', None)
+if llvm_src_root != None:
+  # ``src_root`` may be in LLVM's binary build directory which does not contain
+  # ``lit.py``, so use `llvm_src_root` instead.
+  lit_path = os.path.join(llvm_src_root, 'utils', 'lit')
+else:
+  lit_path = src_root
+
+config.environment['PYTHONPATH'] = lit_path # Required because some tests import the lit module
 config.substitutions.append(('%{src_root}', src_root))
 config.substitutions.append(('%{inputs}', os.path.join(
             src_root, 'tests', 'Inputs')))
 config.substitutions.append(('%{lit}', "%%{python} %s" % (
-            os.path.join(src_root, 'lit.py'),)))
+            os.path.join(lit_path, 'lit.py'),)))
 config.substitutions.append(('%{python}', sys.executable))
 
 # Enable coverage.py reporting, assuming the coverage module has been installed
@@ -52,3 +60,14 @@ try:
 except ImportError:
     lit_config.warning('Could not import psutil. Some tests will be skipped and'
                        ' the --timeout command line argument will not work.')
+
+if sys.platform.startswith('win') or sys.platform.startswith('cygwin'):
+    config.available_features.add('windows')
+
+# Add llvm and lit tools directories if this config is being loaded indirectly.
+path = config.environment['PATH']
+for attribute in ('llvm_tools_dir', 'lit_tools_dir'):
+    directory = getattr(config, attribute, None)
+    if directory:
+        path = os.path.pathsep.join((directory, path))
+config.environment['PATH'] = path
diff --git a/utils/lit/tests/lit.site.cfg.in b/utils/lit/tests/lit.site.cfg.in
new file mode 100644
index 000000000000..693364c7c973
--- /dev/null
+++ b/utils/lit/tests/lit.site.cfg.in
@@ -0,0 +1,20 @@
+@LIT_SITE_CFG_IN_HEADER@
+
+import sys
+
+config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@"
+config.llvm_src_root = "@LLVM_SOURCE_DIR@"
+config.llvm_obj_root = "@LLVM_BINARY_DIR@"
+config.llvm_tools_dir = "@LLVM_TOOLS_DIR@"
+
+# Support substitution of the tools_dir with user parameters.
+# This is used when we can't determine the tool dir at configuration time.
+try:
+    config.llvm_tools_dir = config.llvm_tools_dir % lit_config.params
+except KeyError:
+    e = sys.exc_info()[1]
+    key, = e.args
+    lit_config.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key))
+
+# Let the main config do the real work.
+lit_config.load_config(config, "@LLVM_BINARY_DIR@/utils/lit/tests/lit.cfg")
diff --git a/utils/lit/tests/max-failures.py b/utils/lit/tests/max-failures.py
index bc58e9a4e47f..c86d7b7fbdce 100644
--- a/utils/lit/tests/max-failures.py
+++ b/utils/lit/tests/max-failures.py
@@ -8,7 +8,7 @@
 #
 # END.
 
-# CHECK: Failing Tests (3)
+# CHECK: Failing Tests (17)
 # CHECK: Failing Tests (1)
 # CHECK: Failing Tests (2)
 # CHECK: error: Setting --max-failures to 0 does not have any effect.
diff --git a/utils/lit/tests/selecting.py b/utils/lit/tests/selecting.py
index 4a0d08b860b8..25ac299d865d 100644
--- a/utils/lit/tests/selecting.py
+++ b/utils/lit/tests/selecting.py
@@ -1,7 +1,6 @@
 # RUN: %{lit} %{inputs}/discovery | FileCheck --check-prefix=CHECK-BASIC %s
 # CHECK-BASIC: Testing: 5 tests
 
-
 # Check that regex-filtering works
 #
 # RUN: %{lit} --filter 'o[a-z]e' %{inputs}/discovery | FileCheck --check-prefix=CHECK-FILTER %s
diff --git a/utils/lit/tests/shtest-format.py b/utils/lit/tests/shtest-format.py
index 37e3e1c12629..94d74e3a9200 100644
--- a/utils/lit/tests/shtest-format.py
+++ b/utils/lit/tests/shtest-format.py
@@ -16,7 +16,7 @@
 # CHECK-NEXT: line 2: failed test output on stdout
 # CHECK: Command Output (stderr):
 # CHECK-NEXT: --
-# CHECK-NEXT: cat: does-not-exist: No such file or directory
+# CHECK-NEXT: cat{{(\.exe)?}}: does-not-exist: No such file or directory
 # CHECK: --
 
 # CHECK: FAIL: shtest-format :: external_shell/fail_with_bad_encoding.txt
diff --git a/utils/lit/tests/shtest-output-printing.py b/utils/lit/tests/shtest-output-printing.py
index 24580b37f1f5..2a85cf975c9a 100644
--- a/utils/lit/tests/shtest-output-printing.py
+++ b/utils/lit/tests/shtest-output-printing.py
@@ -22,7 +22,7 @@
 # CHECK-NEXT: hi
 #
 # CHECK:      $ "wc" "missing-file"
-# CHECK-NEXT: # redirected output from '{{.*}}/basic.txt.tmp.out':
+# CHECK-NEXT: # redirected output from '{{.*(/|\\\\)}}basic.txt.tmp.out':
 # CHECK-NEXT: missing-file{{.*}} No such file or directory
 # CHECK:      note: command had no output on stdout or stderr
 # CHECK-NEXT: error: command failed with exit status: 1
diff --git a/utils/lit/tests/shtest-shell.py b/utils/lit/tests/shtest-shell.py
index 18b80cd7d087..723842fce040 100644
--- a/utils/lit/tests/shtest-shell.py
+++ b/utils/lit/tests/shtest-shell.py
@@ -1,12 +1,76 @@
 # Check the internal shell handling component of the ShTest format.
 #
 # RUN: not %{lit} -j 1 -v %{inputs}/shtest-shell > %t.out
+# FIXME: Temporarily dump test output so we can debug failing tests on
+# buildbots.
+# RUN: cat %t.out
 # RUN: FileCheck --input-file %t.out %s
 #
 # END.
 
 # CHECK: -- Testing:
 
+
+# CHECK: FAIL: shtest-shell :: diff-error-0.txt
+# CHECK: *** TEST 'shtest-shell :: diff-error-0.txt' FAILED ***
+# CHECK: $ "diff" "diff-error-0.txt" "diff-error-0.txt"
+# CHECK: # command stderr:
+# CHECK: Unsupported: 'diff' cannot be part of a pipeline
+# CHECK: error: command failed with exit status: 127
+# CHECK: ***
+
+# CHECK: FAIL: shtest-shell :: diff-error-1.txt
+# CHECK: *** TEST 'shtest-shell :: diff-error-1.txt' FAILED ***
+# CHECK: $ "diff" "-B" "temp1.txt" "temp2.txt"
+# CHECK: # command stderr:
+# CHECK: Unsupported: 'diff': option -B not recognized
+# CHECK: error: command failed with exit status: 127
+# CHECK: ***
+
+# CHECK: FAIL: shtest-shell :: diff-error-2.txt
+# CHECK: *** TEST 'shtest-shell :: diff-error-2.txt' FAILED ***
+# CHECK: $ "diff" "temp.txt"
+# CHECK: # command stderr:
+# CHECK: Error:  missing or extra operand
+# CHECK: error: command failed with exit status: 127
+# CHECK: ***
+
+# CHECK: FAIL: shtest-shell :: diff-error-3.txt
+# CHECK: *** TEST 'shtest-shell :: diff-error-3.txt' FAILED ***
+# CHECK: $ "diff" "temp.txt" "temp1.txt"
+# CHECK: # command stderr:
+# CHECK: Error: 'diff' command failed
+# CHECK: error: command failed with exit status: 1
+# CHECK: ***
+
+# CHECK: FAIL: shtest-shell :: diff-error-4.txt
+# CHECK: *** TEST 'shtest-shell :: diff-error-4.txt' FAILED ***
+# CHECK: Exit Code: 1
+# CHECK: # command output:
+# CHECK: diff-error-4.txt.tmp
+# CHECK: diff-error-4.txt.tmp1
+# CHECK: *** 1 ****
+# CHECK: ! hello-first
+# CHECK: --- 1 ----
+# CHECK: ! hello-second
+# CHECK: ***
+
+# CHECK: FAIL: shtest-shell :: diff-error-5.txt
+# CHECK: *** TEST 'shtest-shell :: diff-error-5.txt' FAILED ***
+# CHECK: $ "diff"
+# CHECK: # command stderr:
+# CHECK: Error:  missing or extra operand
+# CHECK: error: command failed with exit status: 127
+# CHECK: ***
+
+# CHECK: FAIL: shtest-shell :: diff-error-6.txt
+# CHECK: *** TEST 'shtest-shell :: diff-error-6.txt' FAILED ***
+# CHECK: $ "diff"
+# CHECK: # command stderr:
+# CHECK: Error:  missing or extra operand
+# CHECK: error: command failed with exit status: 127
+# CHECK: ***
+
 # CHECK: FAIL: shtest-shell :: error-0.txt
 # CHECK: *** TEST 'shtest-shell :: error-0.txt' FAILED ***
 # CHECK: $ "not-a-real-command"
@@ -27,7 +91,62 @@
 # CHECK: Unsupported redirect:
 # CHECK: ***
 
+# CHECK: FAIL: shtest-shell :: mkdir-error-0.txt
+# CHECK: *** TEST 'shtest-shell :: mkdir-error-0.txt' FAILED ***
+# CHECK: $ "mkdir" "-p" "temp"
+# CHECK: # command stderr:
+# CHECK: Unsupported: 'mkdir' cannot be part of a pipeline
+# CHECK: error: command failed with exit status: 127
+# CHECK: ***
+
+# CHECK: FAIL: shtest-shell :: mkdir-error-1.txt
+# CHECK: *** TEST 'shtest-shell :: mkdir-error-1.txt' FAILED ***
+# CHECK: $ "mkdir" "-p" "-m" "777" "temp"
+# CHECK: # command stderr:
+# CHECK: Unsupported: 'mkdir': option -m not recognized
+# CHECK: error: command failed with exit status: 127
+# CHECK: ***
+
+# CHECK: FAIL: shtest-shell :: mkdir-error-2.txt
+# CHECK: *** TEST 'shtest-shell :: mkdir-error-2.txt' FAILED ***
+# CHECK: $ "mkdir" "-p"
+# CHECK: # command stderr:
+# CHECK: Error: 'mkdir' is missing an operand
+# CHECK: error: command failed with exit status: 127
+# CHECK: ***
+
 # CHECK: PASS: shtest-shell :: redirects.txt
+
+# CHECK: FAIL: shtest-shell :: rm-error-0.txt
+# CHECK: *** TEST 'shtest-shell :: rm-error-0.txt' FAILED ***
+# CHECK: $ "rm" "-rf" "temp"
+# CHECK: # command stderr:
+# CHECK: Unsupported: 'rm' cannot be part of a pipeline
+# CHECK: error: command failed with exit status: 127
+# CHECK: ***
+
+# CHECK: FAIL: shtest-shell :: rm-error-1.txt
+# CHECK: *** TEST 'shtest-shell :: rm-error-1.txt' FAILED ***
+# CHECK: $ "rm" "-f" "-v" "temp"
+# CHECK: # command stderr:
+# CHECK: Unsupported: 'rm': option -v not recognized
+# CHECK: error: command failed with exit status: 127
+# CHECK: ***
+
+# CHECK: FAIL: shtest-shell :: rm-error-2.txt
+# CHECK: *** TEST 'shtest-shell :: rm-error-2.txt' FAILED ***
+# CHECK: $ "rm" "-r" "hello"
+# CHECK: # command stderr:
+# CHECK: Error: 'rm' command failed
+# CHECK: error: command failed with exit status: 1
+# CHECK: ***
+
+# CHECK: FAIL: shtest-shell :: rm-error-3.txt
+# CHECK: *** TEST 'shtest-shell :: rm-error-3.txt' FAILED ***
+# CHECK: Exit Code: 1
+# CHECK: ***
+
 # CHECK: PASS: shtest-shell :: sequencing-0.txt
 # CHECK: XFAIL: shtest-shell :: sequencing-1.txt
-# CHECK: Failing Tests (3)
+# CHECK: PASS: shtest-shell :: valid-shell.txt
+# CHECK: Failing Tests (17)
diff --git a/utils/lit/tests/shtest-timeout.py b/utils/lit/tests/shtest-timeout.py
index 87f431de2500..879850065908 100644
--- a/utils/lit/tests/shtest-timeout.py
+++ b/utils/lit/tests/shtest-timeout.py
@@ -1,5 +1,8 @@
 # REQUIRES: python-psutil
 
+# PR33944
+# XFAIL: windows
+
 # Test per test timeout using external shell
 # RUN: not %{lit} \
 # RUN: %{inputs}/shtest-timeout/infinite_loop.py \
diff --git a/utils/lit/tests/unit/TestRunner.py b/utils/lit/tests/unit/TestRunner.py
index 79cc10f7e14d..874bf275d4ea 100644
--- a/utils/lit/tests/unit/TestRunner.py
+++ b/utils/lit/tests/unit/TestRunner.py
@@ -28,6 +28,7 @@ class TestIntegratedTestKeywordParser(unittest.TestCase):
                                              quiet=False,
                                              useValgrind=False,
                                              valgrindLeakCheck=False,
+                                             singleProcess=False,
                                              valgrindArgs=[],
                                              noExecute=False,
                                              debug=False,
diff --git a/utils/llvm-gisel-cov.py b/utils/llvm-gisel-cov.py
new file mode 100644
index 000000000000..a74ed10f8642
--- /dev/null
+++ b/utils/llvm-gisel-cov.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python
+"""
+Summarize the information in the given coverage files.
+
+Emits the number of rules covered or the percentage of rules covered depending
+on whether --num-rules has been used to specify the total number of rules.
+"""
+
+import argparse
+import struct
+
+class FileFormatError(Exception):
+  pass
+
+def backend_int_pair(s):
+  backend, sep, value = s.partition('=')
+  if (sep is None):
+    raise argparse.ArgumentTypeError("'=' missing, expected name=value")
+  if (not backend):
+    raise argparse.ArgumentTypeError("Expected name=value")
+  if (not value):
+    raise argparse.ArgumentTypeError("Expected name=value")
+  return backend, int(value)
+
+def main():
+  parser = argparse.ArgumentParser(description=__doc__)
+  parser.add_argument('input', nargs='+')
+  parser.add_argument('--num-rules', type=backend_int_pair, action='append',
+                      metavar='BACKEND=NUM',
+                      help='Specify the number of rules for a backend')
+  args = parser.parse_args()
+
+  covered_rules = {}
+
+  for input_filename in args.input:
+    with open(input_filename, 'rb') as input_fh:
+      data = input_fh.read()
+      pos = 0
+      while data:
+        backend, _, data = data.partition('\0')
+        pos += len(backend)
+        pos += 1
+
+        if len(backend) == 0:
+          raise FileFormatError()
+        backend, = struct.unpack("%ds" % len(backend), backend)
+
+        while data:
+          if len(data) < 8:
+            raise FileFormatError()
+          rule_id, = struct.unpack("Q", data[:8])
+          pos += 8
+          data = data[8:]
+          if rule_id == (2 ** 64) - 1:
+            break
+          covered_rules[backend] = covered_rules.get(backend, {})
+          covered_rules[backend][rule_id] = covered_rules[backend].get(rule_id, 0) + 1
+
+  num_rules = dict(args.num_rules)
+  for backend, rules_for_backend in covered_rules.items():
+    if backend in num_rules:
+      print "%s: %3.2f%% of rules covered" % (backend, (float(len(rules_for_backend.keys())) / num_rules[backend]) * 100)
+    else:
+      print "%s: %d rules covered" % (backend, len(rules_for_backend.keys()))
+
+if __name__ == '__main__':
+  main()
diff --git a/utils/llvm-lit/CMakeLists.txt b/utils/llvm-lit/CMakeLists.txt
index 4b10354cfdea..670175c81851 100644
--- a/utils/llvm-lit/CMakeLists.txt
+++ b/utils/llvm-lit/CMakeLists.txt
@@ -1,12 +1,13 @@
-if (WIN32 AND NOT CYGWIN)
-  # llvm-lit needs suffix.py for multiprocess to find a main module.
-  set(suffix .py)
-endif ()
-set(llvm_lit_path ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-lit${suffix})
+get_property(LLVM_LIT_CONFIG_MAP GLOBAL PROPERTY LLVM_LIT_CONFIG_MAP)
+
+get_llvm_lit_path(LIT_BASE_DIR LIT_FILE_NAME)
+
+set(LLVM_SOURCE_DIR ${LLVM_MAIN_SRC_DIR})
 
 if(NOT "${CMAKE_CFG_INTDIR}" STREQUAL ".")
   foreach(BUILD_MODE ${CMAKE_CONFIGURATION_TYPES})
-    string(REPLACE ${CMAKE_CFG_INTDIR} ${BUILD_MODE} bi ${llvm_lit_path})
+    string(REPLACE ${CMAKE_CFG_INTDIR} ${BUILD_MODE} bi ${LIT_BASE_DIR})
+    set(bi "${bi}/${LIT_FILE_NAME}")
     configure_file(
       llvm-lit.in
       ${bi}
@@ -16,6 +17,6 @@ else()
   set(BUILD_MODE .)
   configure_file(
     llvm-lit.in
-    ${llvm_lit_path}
+    ${LIT_BASE_DIR}/${LIT_FILE_NAME}
     )
 endif()
diff --git a/utils/llvm-lit/llvm-lit.in b/utils/llvm-lit/llvm-lit.in
index 08a2a670b21c..de67b18e52c1 100644
--- a/utils/llvm-lit/llvm-lit.in
+++ b/utils/llvm-lit/llvm-lit.in
@@ -3,6 +3,15 @@
 import os
 import sys
 
+config_map = {}
+
+def map_config(source_dir, site_config):
+    global config_map
+    source_dir = os.path.realpath(source_dir)
+    source_dir = os.path.normcase(source_dir)
+    site_config = os.path.normpath(site_config)
+    config_map[source_dir] = site_config
+
 # Variables configured at build time.
 llvm_source_root = "@LLVM_SOURCE_DIR@"
 llvm_obj_root = "@LLVM_BINARY_DIR@"
@@ -12,42 +21,11 @@ sys.path.insert(0, os.path.join(llvm_source_root, 'utils', 'lit'))
 
 # Set up some builtin parameters, so that by default the LLVM test suite
 # configuration file knows how to find the object tree.
-builtin_parameters = {
-    'build_mode' : "@BUILD_MODE@",
-    'llvm_site_config' : os.path.join(llvm_obj_root, 'test', 'lit.site.cfg'),
-    'llvm_unit_site_config' : os.path.join(llvm_obj_root, 'test', 'Unit',
-                                           'lit.site.cfg')
-    }
-
-clang_obj_root = os.path.join(llvm_obj_root, 'tools', 'clang')
-
-if os.path.exists(clang_obj_root):
-    builtin_parameters['clang_site_config'] = \
-        os.path.join(clang_obj_root, 'test', 'lit.site.cfg')
-    clang_tools_extra_obj_root = os.path.join(clang_obj_root, 'tools', 'extra')
-    if os.path.exists(clang_tools_extra_obj_root):
-        builtin_parameters['clang_tools_extra_site_config'] = \
-            os.path.join(clang_tools_extra_obj_root, 'test', 'lit.site.cfg')
-
-lld_obj_root = os.path.join(llvm_obj_root, 'tools', 'lld')
-if os.path.exists(lld_obj_root):
-    builtin_parameters['lld_site_config'] = \
-        os.path.join(lld_obj_root, 'test', 'lit.site.cfg')
-
-compilerrt_obj_root = os.path.join(llvm_obj_root, 'projects', 'compiler-rt')
-if os.path.exists(compilerrt_obj_root):
-    builtin_parameters['compilerrt_site_basedir'] = \
-            os.path.join(compilerrt_obj_root, 'test')
-
-libcxx_obj_root = os.path.join(llvm_obj_root, 'projects', 'libcxx')
-if os.path.exists(libcxx_obj_root):
-    builtin_parameters['libcxx_site_config'] = \
-        os.path.join(libcxx_obj_root, 'test', 'lit.site.cfg')
-
-libcxxabi_obj_root = os.path.join(llvm_obj_root, 'projects', 'libcxxabi')
-if os.path.exists(libcxxabi_obj_root):
-    builtin_parameters['libcxxabi_site_config'] = \
-        os.path.join(libcxxabi_obj_root, 'test', 'lit.site.cfg')
+builtin_parameters = { 'build_mode' : "@BUILD_MODE@" }
+
+@LLVM_LIT_CONFIG_MAP@
+
+builtin_parameters['config_map'] = config_map
 
 if __name__=='__main__':
     from lit.main import main
diff --git a/utils/not/CMakeLists.txt b/utils/not/CMakeLists.txt
index 4a92348ba0f9..29c7b0218521 100644
--- a/utils/not/CMakeLists.txt
+++ b/utils/not/CMakeLists.txt
@@ -2,4 +2,4 @@ add_llvm_utility(not
   not.cpp
   )
 
-target_link_libraries(not LLVMSupport)
+target_link_libraries(not PRIVATE LLVMSupport)
diff --git a/utils/not/not.cpp b/utils/not/not.cpp
index 271fbcfe94ae..de71b4c68878 100644
--- a/utils/not/not.cpp
+++ b/utils/not/not.cpp
@@ -39,8 +39,7 @@ int main(int argc, const char **argv) {
   }
 
   std::string ErrMsg;
-  int Result = sys::ExecuteAndWait(*Program, argv, nullptr, nullptr, 0, 0,
-                                   &ErrMsg);
+  int Result = sys::ExecuteAndWait(*Program, argv, nullptr, {}, 0, 0, &ErrMsg);
 #ifdef _WIN32
   // Handle abort() in msvcrt -- It has exit code as 3.  abort(), aka
   // unreachable, should be recognized as a crash.  However, some binaries use
diff --git a/utils/release/build_llvm_package.bat b/utils/release/build_llvm_package.bat
index 79871781211a..ef9b21ce923a 100755
--- a/utils/release/build_llvm_package.bat
+++ b/utils/release/build_llvm_package.bat
@@ -26,8 +26,8 @@ set python64_dir=C:\Users\%USER%\AppData\Local\Programs\Python\Python35
 
 set revision=%1
 set branch=trunk
-set package_version=5.0.0-r%revision%
-set clang_format_vs_version=5.0.0.%revision%
+set package_version=6.0.0-r%revision%
+set clang_format_vs_version=6.0.0.%revision%
 set build_dir=llvm_package_%revision%
 
 echo Branch: %branch%
diff --git a/utils/sanitizers/ubsan_blacklist.txt b/utils/sanitizers/ubsan_blacklist.txt
index 49975866ea41..b5bbfddceef6 100644
--- a/utils/sanitizers/ubsan_blacklist.txt
+++ b/utils/sanitizers/ubsan_blacklist.txt
@@ -5,3 +5,8 @@
 
 # upcast of address with insufficient space for an object of type std::_Rb_tree_node<...>
 src:*bits/stl_tree.h
+
+# libstdc++ 4.8 creates a null reference when calling
+# data() on an empty vector: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59829
+src:*bits/stl_iterator.h
+src:*bits/stl_vector.h
diff --git a/utils/shuffle_select_fuzz_tester.py b/utils/shuffle_select_fuzz_tester.py
new file mode 100644
index 000000000000..88d8d75f7b98
--- /dev/null
+++ b/utils/shuffle_select_fuzz_tester.py
@@ -0,0 +1,404 @@
+#!/usr/bin/env python
+
+"""A shuffle-select vector fuzz tester.
+
+This is a python program to fuzz test the LLVM shufflevector and select
+instructions. It generates a function with a random sequnece of shufflevectors
+while optionally attaching it with a select instruction (regular or zero merge),
+maintaining the element mapping accumulated across the function. It then
+generates a main function which calls it with a different value in each element
+and checks that the result matches the expected mapping.
+
+Take the output IR printed to stdout, compile it to an executable using whatever
+set of transforms you want to test, and run the program. If it crashes, it found
+a bug (an error message with the expected and actual result is printed).
+"""
+
+import random
+import uuid
+import argparse
+
+# Possibility of one undef index in generated mask for shufflevector instruction
+SHUF_UNDEF_POS = 0.15
+
+# Possibility of one undef index in generated mask for select instruction
+SEL_UNDEF_POS = 0.15
+
+# Possibility of adding a select instruction to the result of a shufflevector
+ADD_SEL_POS = 0.4
+
+# If we are adding a select instruction, this is the possibility of a
+# merge-select instruction (1 - MERGE_SEL_POS = possibility of zero-merge-select
+# instruction.
+MERGE_SEL_POS = 0.5
+
+
+test_template = r'''
+define internal fastcc {ty} @test({inputs}) noinline nounwind {{
+entry:
+{instructions}
+  ret {ty} {last_name}
+}}
+'''
+
+error_template = r'''@error.{lane} = private unnamed_addr global [64 x i8] c"FAIL: lane {lane}, expected {exp}, found %d\0A{padding}"'''
+
+main_template = r'''
+define i32 @main() {{
+entry:
+  ; Create a scratch space to print error messages.
+  %str = alloca [64 x i8]
+  %str.ptr = getelementptr inbounds [64 x i8], [64 x i8]* %str, i32 0, i32 0
+
+  ; Build the input vector and call the test function.
+  %v = call fastcc {ty} @test({inputs})
+  br label %test.0
+
+  {check_die}
+}}
+
+declare i32 @strlen(i8*)
+declare i32 @write(i32, i8*, i32)
+declare i32 @sprintf(i8*, i8*, ...)
+declare void @llvm.trap() noreturn nounwind
+'''
+
+check_template = r'''
+test.{lane}:
+  %v.{lane} = extractelement {ty} %v, i32 {lane}
+  %cmp.{lane} = {i_f}cmp {ordered}ne {scalar_ty} %v.{lane}, {exp}
+  br i1 %cmp.{lane}, label %die.{lane}, label %test.{n_lane}
+'''
+
+undef_check_template = r'''
+test.{lane}:
+; Skip this lane, its value is undef.
+  br label %test.{n_lane}
+'''
+
+die_template = r'''
+die.{lane}:
+; Capture the actual value and print an error message.
+  call i32 (i8*, i8*, ...) @sprintf(i8* %str.ptr, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @error.{lane}, i32 0, i32 0), {scalar_ty} %v.{lane})
+  %length.{lane} = call i32 @strlen(i8* %str.ptr)
+  call i32 @write(i32 2, i8* %str.ptr, i32 %length.{lane})
+  call void @llvm.trap()
+  unreachable
+'''
+
+class Type:
+  def __init__(self, is_float, elt_width, elt_num):
+    self.is_float = is_float        # Boolean
+    self.elt_width = elt_width      # Integer
+    self.elt_num = elt_num          # Integer
+
+  def dump(self):
+    if self.is_float:
+      str_elt = 'float' if self.elt_width == 32 else 'double'
+    else:
+      str_elt = 'i' + str(self.elt_width)
+
+    if self.elt_num == 1:
+      return str_elt
+    else:
+      return '<' + str(self.elt_num) + ' x ' + str_elt + '>'
+
+  def get_scalar_type(self):
+    return Type(self.is_float, self.elt_width, 1)
+
+
+
+# Class to represent any value (variable) that can be used.
+class Value:
+  def __init__(self, name, ty, value = None):
+    self.ty = ty                  # Type
+    self.name = name              # String
+    self.value = value            # list of integers or floating points
+
+
+# Class to represent an IR instruction (shuffle/select).
+class Instruction(Value):
+  def __init__(self, name, ty, op0, op1, mask):
+    Value.__init__(self, name, ty)
+    self.op0 = op0                # Value
+    self.op1 = op1                # Value
+    self.mask = mask              # list of integers
+
+  def dump(self): pass
+
+  def calc_value(self): pass
+
+
+# Class to represent an IR shuffle instruction
+class ShufInstr(Instruction):
+
+  shuf_template = '  {name} = shufflevector {ty} {op0}, {ty} {op1}, <{num} x i32> {mask}\n'
+
+  def __init__(self, name, ty, op0, op1, mask):
+    Instruction.__init__(self, '%shuf' + name, ty, op0, op1, mask)
+
+  def dump(self):
+    str_mask = [('i32 ' + str(idx)) if idx != -1 else 'i32 undef' for idx in self.mask]
+    str_mask = '<' + (', ').join(str_mask) + '>'
+    return self.shuf_template.format(name = self.name, ty = self.ty.dump(), op0 = self.op0.name,
+                               op1 = self.op1.name, num = self.ty.elt_num, mask = str_mask)
+
+  def calc_value(self):
+    if self.value != None:
+      print 'Trying to calculate the value of a shuffle instruction twice'
+      exit(1)
+
+    result = []
+    for i in range(len(self.mask)):
+      index = self.mask[i]
+
+      if index < self.ty.elt_num and index >= 0:
+        result.append(self.op0.value[index])
+      elif index >= self.ty.elt_num:
+        index = index % self.ty.elt_num
+        result.append(self.op1.value[index])
+      else: # -1 => undef
+        result.append(-1)
+
+    self.value = result
+
+
+# Class to represent an IR select instruction
+class SelectInstr(Instruction):
+
+  sel_template = '  {name} = select <{num} x i1> {mask}, {ty} {op0}, {ty} {op1}\n'
+
+  def __init__(self, name, ty, op0, op1, mask):
+    Instruction.__init__(self, '%sel' + name, ty, op0, op1, mask)
+
+  def dump(self):
+    str_mask = [('i1 ' + str(idx)) if idx != -1 else 'i1 undef' for idx in self.mask]
+    str_mask = '<' + (', ').join(str_mask) + '>'
+    return self.sel_template.format(name = self.name, ty = self.ty.dump(), op0 = self.op0.name,
+                               op1 = self.op1.name, num = self.ty.elt_num, mask = str_mask)
+
+  def calc_value(self):
+    if self.value != None:
+      print 'Trying to calculate the value of a select instruction twice'
+      exit(1)
+
+    result = []
+    for i in range(len(self.mask)):
+      index = self.mask[i]
+
+      if index == 1:
+        result.append(self.op0.value[i])
+      elif index == 0:
+        result.append(self.op1.value[i])
+      else: # -1 => undef
+        result.append(-1)
+
+    self.value = result
+
+
+# Returns a list of Values initialized with actual numbers according to the
+# provided type
+def gen_inputs(ty, num):
+  inputs = []
+  for i in range(num):
+    inp = []
+    for j in range(ty.elt_num):
+      if ty.is_float:
+        inp.append(float(i*ty.elt_num + j))
+      else:
+        inp.append((i*ty.elt_num + j) % (1 << ty.elt_width))
+    inputs.append(Value('%inp' + str(i), ty, inp))
+
+  return inputs
+
+
+# Returns a random vector type to be tested
+# In case one of the dimensions (scalar type/number of elements) is provided,
+# fill the blank dimension and return appropriate Type object.
+def get_random_type(ty, num_elts):
+  if ty != None:
+    if ty == 'i8':
+      is_float = False
+      width = 8
+    elif ty == 'i16':
+      is_float = False
+      width = 16
+    elif ty == 'i32':
+      is_float = False
+      width = 32
+    elif ty == 'i64':
+      is_float = False
+      width = 64
+    elif ty == 'f32':
+      is_float = True
+      width = 32
+    elif ty == 'f64':
+      is_float = True
+      width = 64
+
+  int_elt_widths = [8, 16, 32, 64]
+  float_elt_widths = [32, 64]
+
+  if num_elts == None:
+    num_elts = random.choice(range(2, 65))
+
+  if ty == None:
+    # 1 for integer type, 0 for floating-point
+    if random.randint(0,1):
+      is_float = False
+      width = random.choice(int_elt_widths)
+    else:
+      is_float = True
+      width = random.choice(float_elt_widths)
+
+  return Type(is_float, width, num_elts)
+
+
+# Generate mask for shufflevector IR instruction, with SHUF_UNDEF_POS possibility
+# of one undef index.
+def gen_shuf_mask(ty):
+  mask = []
+  for i in range(ty.elt_num):
+    if SHUF_UNDEF_POS/ty.elt_num > random.random():
+      mask.append(-1)
+    else:
+      mask.append(random.randint(0, ty.elt_num*2 - 1))
+
+  return mask
+
+
+# Generate mask for select IR instruction, with SEL_UNDEF_POS possibility
+# of one undef index.
+def gen_sel_mask(ty):
+  mask = []
+  for i in range(ty.elt_num):
+    if SEL_UNDEF_POS/ty.elt_num > random.random():
+      mask.append(-1)
+    else:
+      mask.append(random.randint(0, 1))
+
+  return mask
+
+# Generate shuffle instructions with optional select instruction after.
+def gen_insts(inputs, ty):
+  int_zero_init = Value('zeroinitializer', ty, [0]*ty.elt_num)
+  float_zero_init = Value('zeroinitializer', ty, [0.0]*ty.elt_num)
+
+  insts = []
+  name_idx = 0
+  while len(inputs) > 1:
+    # Choose 2 available Values - remove them from inputs list.
+    [idx0, idx1] = sorted(random.sample(range(len(inputs)), 2))
+    op0 = inputs[idx0]
+    op1 = inputs[idx1]
+
+    # Create the shuffle instruction.
+    shuf_mask = gen_shuf_mask(ty)
+    shuf_inst = ShufInstr(str(name_idx), ty, op0, op1, shuf_mask)
+    shuf_inst.calc_value()
+
+    # Add the new shuffle instruction to the list of instructions.
+    insts.append(shuf_inst)
+
+    # Optionally, add select instruction with the result of the previous shuffle.
+    if random.random() < ADD_SEL_POS:
+      #  Either blending with a random Value or with an all-zero vector.
+      if random.random() < MERGE_SEL_POS:
+        op2 = random.choice(inputs)
+      else:
+        op2 = float_zero_init if ty.is_float else int_zero_init
+
+      select_mask = gen_sel_mask(ty)
+      select_inst = SelectInstr(str(name_idx), ty, shuf_inst, op2, select_mask)
+      select_inst.calc_value()
+
+      # Add the select instructions to the list of instructions and to the available Values.
+      insts.append(select_inst)
+      inputs.append(select_inst)
+    else:
+      # If the shuffle instruction is not followed by select, add it to the available Values.
+      inputs.append(shuf_inst)
+
+    del inputs[idx1]
+    del inputs[idx0]
+    name_idx += 1
+
+  return insts
+
+
+def main():
+  parser = argparse.ArgumentParser(description=__doc__)
+  parser.add_argument('--seed', default=str(uuid.uuid4()),
+                      help='A string used to seed the RNG')
+  parser.add_argument('--max-num-inputs', type=int, default=20,
+          help='Specify the maximum number of vector inputs for the test. (default: 20)')
+  parser.add_argument('--min-num-inputs', type=int, default=10,
+          help='Specify the minimum number of vector inputs for the test. (default: 10)')
+  parser.add_argument('--type', default=None,
+                      help='''
+                          Choose specific type to be tested.
+                          i8, i16, i32, i64, f32 or f64.
+                          (default: random)''')
+  parser.add_argument('--num-elts', default=None, type=int,
+                      help='Choose specific number of vector elements to be tested. (default: random)')
+  args = parser.parse_args()
+
+  print '; The seed used for this test is ' + args.seed
+
+  assert args.min_num_inputs < args.max_num_inputs , "Minimum value greater than maximum."
+  assert args.type in [None, 'i8', 'i16', 'i32', 'i64', 'f32', 'f64'], "Illegal type."
+  assert args.num_elts == None or args.num_elts > 0, "num_elts must be a positive integer."
+
+  random.seed(args.seed)
+  ty = get_random_type(args.type, args.num_elts)
+  inputs = gen_inputs(ty, random.randint(args.min_num_inputs, args.max_num_inputs))
+  inputs_str = (', ').join([inp.ty.dump() + ' ' + inp.name for inp in inputs])
+  inputs_values = [inp.value for inp in inputs]
+
+  insts = gen_insts(inputs, ty)
+
+  assert len(inputs) == 1, "Only one value should be left after generating phase"
+  res = inputs[0]
+
+  # print the actual test function by dumping the generated instructions.
+  insts_str = ''.join([inst.dump() for inst in insts])
+  print test_template.format(ty = ty.dump(), inputs = inputs_str,
+                             instructions = insts_str, last_name = res.name)
+
+  # Print the error message templates as global strings
+  for i in range(len(res.value)):
+    pad = ''.join(['\\00']*(31 - len(str(i)) - len(str(res.value[i]))))
+    print error_template.format(lane = str(i), exp = str(res.value[i]),
+                                padding = pad)
+
+  # Prepare the runtime checks and failure handlers.
+  scalar_ty = ty.get_scalar_type()
+  check_die = ''
+  i_f = 'f' if ty.is_float else 'i'
+  ordered = 'o' if ty.is_float else ''
+  for i in range(len(res.value)):
+    if res.value[i] != -1:
+      # Emit runtime check for each non-undef expected value.
+      check_die += check_template.format(lane = str(i), n_lane = str(i+1),
+                             ty = ty.dump(), i_f = i_f, scalar_ty = scalar_ty.dump(),
+                             exp = str(res.value[i]), ordered = ordered)
+      # Emit failure handler for each runtime check with proper error message
+      check_die += die_template.format(lane = str(i), scalar_ty = scalar_ty.dump())
+    else:
+      # Ignore lanes with undef result
+      check_die += undef_check_template.format(lane = str(i), n_lane = str(i+1))
+
+  check_die += '\ntest.' + str(len(res.value)) + ':\n'
+  check_die += '  ret i32 0'
+
+  # Prepare the input values passed to the test function.
+  inputs_values = [', '.join([scalar_ty.dump() + ' ' + str(i) for i in inp]) for inp in inputs_values]
+  inputs = ', '.join([ty.dump() + ' <' + inp + '>' for inp in inputs_values])
+
+  print main_template.format(ty = ty.dump(), inputs = inputs, check_die = check_die)
+
+
+if __name__ == '__main__':
+  main()
+
+
diff --git a/utils/test_debuginfo.pl b/utils/test_debuginfo.pl
deleted file mode 100755
index aaf90d95468c..000000000000
--- a/utils/test_debuginfo.pl
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/usr/bin/perl
-#
-# This script tests debugging information generated by a compiler.
-# Input arguments
-#   - Input source program. Usually this source file is decorated using
-#     special comments to communicate debugger commands.
-#   - Executable file. This file is generated by the compiler.
-#
-# This perl script extracts debugger commands from input source program 
-# comments in a script. A debugger is used to load the executable file
-# and run the script generated from source program comments. Finally,
-# the debugger output is checked, using FileCheck, to validate 
-# debugging information.
-#
-# On Darwin the default is to use the llgdb.py wrapper script which
-# translates gdb commands into their lldb equivalents.
-
-use File::Basename;
-use Config;
-use Cwd;
-
-my $testcase_file = $ARGV[0];
-my $executable_file = $ARGV[1];
-
-my $input_filename = basename $testcase_file;
-my $output_dir = dirname $executable_file;
-
-my $debugger_script_file = "$output_dir/$input_filename.debugger.script";
-my $output_file = "$output_dir/$input_filename.gdb.output";
-
-my %cmd_map = ();
-# Assume lldb to be the debugger on Darwin.
-my $use_lldb = 0;
-$use_lldb = 1 if ($Config{osname} eq "darwin");
-
-# Extract debugger commands from testcase. They are marked with DEBUGGER: 
-# at the beginning of a comment line.
-open(INPUT, $testcase_file);
-open(OUTPUT, ">$debugger_script_file");
-while(<INPUT>) {
-    my($line) = $_;
-    $i = index($line, "DEBUGGER:");
-    if ( $i >= 0) {
-        $l = length("DEBUGGER:");
-        $s = substr($line, $i + $l);
-        print OUTPUT  "$s";
-    }
-}
-print OUTPUT "\n";
-print OUTPUT "quit\n";
-close(INPUT);
-close(OUTPUT);
-
-# setup debugger and debugger options to run a script.
-my $my_debugger = $ENV{'DEBUGGER'};
-if (!$my_debugger) {
-    if ($use_lldb) {
-        my $path = dirname(Cwd::abs_path($0));
-        $my_debugger = "/usr/bin/env python $path/../tools/clang/test/debuginfo-tests/llgdb.py";
-    } else {
-        $my_debugger = "gdb";
-    }
-}
-
-# quiet / exit after cmdline / no init file / execute script
-my $debugger_options = "-q -batch -n -x";
-
-# run debugger and capture output.
-system("$my_debugger $debugger_options $debugger_script_file $executable_file > $output_file 2>&1");
-
-# validate output.
-system("FileCheck", "-input-file", "$output_file", "$testcase_file");
-if ($?>>8 == 1) {
-    print "Debugger output was:\n";
-    system("cat", "$output_file");
-    exit 1;
-}
-else {
-    exit 0;
-}
diff --git a/utils/unittest/CMakeLists.txt b/utils/unittest/CMakeLists.txt
index b42ac834e3a7..5b5cbf5d745f 100644
--- a/utils/unittest/CMakeLists.txt
+++ b/utils/unittest/CMakeLists.txt
@@ -19,6 +19,11 @@ include_directories(
   googlemock
   )
 
+# LLVM requires C++11 but gtest doesn't correctly detect the availability
+# of C++11 on MSVC, so we force it on.
+add_definitions(-DGTEST_LANG_CXX11=1)
+add_definitions(-DGTEST_HAS_TR1_TUPLE=0)
+
 if(WIN32)
   add_definitions(-DGTEST_OS_WINDOWS=1)
 endif()
diff --git a/utils/update_llc_test_checks.py b/utils/update_llc_test_checks.py
index 3b3ff74d8633..57d6e578259c 100755
--- a/utils/update_llc_test_checks.py
+++ b/utils/update_llc_test_checks.py
@@ -26,38 +26,35 @@ def llc(args, cmd_args, ir):
 
 # RegEx: this is where the magic happens.
 
-SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
-SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
-SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
-SCRUB_LOOP_COMMENT_RE = re.compile(
-    r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M)
-
 ASM_FUNCTION_X86_RE = re.compile(
     r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?'
     r'(?P<body>^##?[ \t]+[^:]+:.*?)\s*'
-    r'^\s*(?:[^:\n]+?:\s*\n\s*\.size|\.cfi_endproc|\.globl|\.comm|\.(?:sub)?section)',
+    r'^\s*(?:[^:\n]+?:\s*\n\s*\.size|\.cfi_endproc|\.globl|\.comm|\.(?:sub)?section|#+ -- End function)',
     flags=(re.M | re.S))
-SCRUB_X86_SHUFFLES_RE = (
-    re.compile(
-        r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem)( \{%k\d+\}( \{z\})?)? = .*)$',
-        flags=re.M))
-SCRUB_X86_SP_RE = re.compile(r'\d+\(%(esp|rsp)\)')
-SCRUB_X86_RIP_RE = re.compile(r'[.\w]+\(%rip\)')
-SCRUB_X86_LCP_RE = re.compile(r'\.LCPI[0-9]+_[0-9]+')
 
 ASM_FUNCTION_ARM_RE = re.compile(
         r'^(?P<func>[0-9a-zA-Z_]+):\n' # f: (name of function)
         r'\s+\.fnstart\n' # .fnstart
         r'(?P<body>.*?)\n' # (body of the function)
-        r'.Lfunc_end[0-9]+:\n', # .Lfunc_end0:
+        r'.Lfunc_end[0-9]+:', # .Lfunc_end0: or # -- End function
         flags=(re.M | re.S))
 
-RUN_LINE_RE = re.compile('^\s*;\s*RUN:\s*(.*)$')
-TRIPLE_ARG_RE = re.compile(r'-mtriple=([^ ]+)')
-TRIPLE_IR_RE = re.compile(r'^target\s+triple\s*=\s*"([^"]+)"$')
-IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@(\w+)\s*\(')
-CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?=(\S+)')
-CHECK_RE = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
+ASM_FUNCTION_AARCH64_RE = re.compile(
+     r'^_?(?P<func>[^:]+):[ \t]*\/\/[ \t]*@(?P=func)\n'
+     r'[ \t]+.cfi_startproc\n'
+     r'(?P<body>.*?)\n'
+     # This list is incomplete
+     r'.Lfunc_end[0-9]+:\n',
+     flags=(re.M | re.S))
+
+ASM_FUNCTION_MIPS_RE = re.compile(
+    r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?' # f: (name of func)
+    r'(?:^[ \t]+\.(frame|f?mask|set).*?\n)+'  # Mips+LLVM standard asm prologue
+    r'(?P<body>.*?)\n'                        # (body of the function)
+    r'(?:^[ \t]+\.(set|end).*?\n)+'           # Mips+LLVM standard asm epilogue
+    r'(\$|\.L)func_end[0-9]+:\n',             # $func_end0: (mips32 - O32) or
+                                              # .Lfunc_end0: (mips64 - NewABI)
+    flags=(re.M | re.S))
 
 ASM_FUNCTION_PPC_RE = re.compile(
     r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n'
@@ -70,6 +67,12 @@ ASM_FUNCTION_PPC_RE = re.compile(
     r'.Lfunc_end[0-9]+:\n',
     flags=(re.M | re.S))
 
+ASM_FUNCTION_RISCV_RE = re.compile(
+    r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?'
+    r'(?P<body>^##?[ \t]+[^:]+:.*?)\s*'
+    r'.Lfunc_end[0-9]+:\n',
+    flags=(re.M | re.S))
+
 ASM_FUNCTION_SYSTEMZ_RE = re.compile(
     r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n'
     r'[ \t]+.cfi_startproc\n'
@@ -78,7 +81,29 @@ ASM_FUNCTION_SYSTEMZ_RE = re.compile(
     flags=(re.M | re.S))
 
 
-def scrub_asm_x86(asm):
+SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
+SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
+SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
+SCRUB_LOOP_COMMENT_RE = re.compile(
+    r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M)
+
+SCRUB_X86_SHUFFLES_RE = (
+    re.compile(
+        r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem)( \{%k\d+\}( \{z\})?)? = .*)$',
+        flags=re.M))
+SCRUB_X86_SP_RE = re.compile(r'\d+\(%(esp|rsp)\)')
+SCRUB_X86_RIP_RE = re.compile(r'[.\w]+\(%rip\)')
+SCRUB_X86_LCP_RE = re.compile(r'\.LCPI[0-9]+_[0-9]+')
+SCRUB_X86_RET_RE = re.compile(r'ret[l|q]')
+
+RUN_LINE_RE = re.compile('^\s*;\s*RUN:\s*(.*)$')
+TRIPLE_ARG_RE = re.compile(r'-mtriple=([^ ]+)')
+TRIPLE_IR_RE = re.compile(r'^target\s+triple\s*=\s*"([^"]+)"$')
+IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@(\w+)\s*\(')
+CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?=(\S+)')
+CHECK_RE = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
+
+def scrub_asm_x86(asm, args):
   # Scrub runs of whitespace out of the assembly, but leave the leading
   # whitespace in place.
   asm = SCRUB_WHITESPACE_RE.sub(r' ', asm)
@@ -92,13 +117,16 @@ def scrub_asm_x86(asm):
   asm = SCRUB_X86_RIP_RE.sub(r'{{.*}}(%rip)', asm)
   # Generically match a LCP symbol.
   asm = SCRUB_X86_LCP_RE.sub(r'{{\.LCPI.*}}', asm)
+  if args.x86_extra_scrub:
+    # Avoid generating different checks for 32- and 64-bit because of 'retl' vs 'retq'.
+    asm = SCRUB_X86_RET_RE.sub(r'ret{{[l|q]}}', asm)
   # Strip kill operands inserted into the asm.
   asm = SCRUB_KILL_COMMENT_RE.sub('', asm)
   # Strip trailing whitespace.
   asm = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
   return asm
 
-def scrub_asm_arm_eabi(asm):
+def scrub_asm_arm_eabi(asm, args):
   # Scrub runs of whitespace out of the assembly, but leave the leading
   # whitespace in place.
   asm = SCRUB_WHITESPACE_RE.sub(r' ', asm)
@@ -110,7 +138,7 @@ def scrub_asm_arm_eabi(asm):
   asm = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
   return asm
 
-def scrub_asm_powerpc64le(asm):
+def scrub_asm_powerpc64(asm, args):
   # Scrub runs of whitespace out of the assembly, but leave the leading
   # whitespace in place.
   asm = SCRUB_WHITESPACE_RE.sub(r' ', asm)
@@ -122,7 +150,27 @@ def scrub_asm_powerpc64le(asm):
   asm = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
   return asm
 
-def scrub_asm_systemz(asm):
+def scrub_asm_mips(asm, args):
+  # Scrub runs of whitespace out of the assembly, but leave the leading
+  # whitespace in place.
+  asm = SCRUB_WHITESPACE_RE.sub(r' ', asm)
+  # Expand the tabs used for indentation.
+  asm = string.expandtabs(asm, 2)
+  # Strip trailing whitespace.
+  asm = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
+  return asm
+
+def scrub_asm_riscv(asm, args):
+  # Scrub runs of whitespace out of the assembly, but leave the leading
+  # whitespace in place.
+  asm = SCRUB_WHITESPACE_RE.sub(r' ', asm)
+  # Expand the tabs used for indentation.
+  asm = string.expandtabs(asm, 2)
+  # Strip trailing whitespace.
+  asm = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
+  return asm
+
+def scrub_asm_systemz(asm, args):
   # Scrub runs of whitespace out of the assembly, but leave the leading
   # whitespace in place.
   asm = SCRUB_WHITESPACE_RE.sub(r' ', asm)
@@ -135,17 +183,39 @@ def scrub_asm_systemz(asm):
 
 # Build up a dictionary of all the function bodies.
 def build_function_body_dictionary(raw_tool_output, triple, prefixes, func_dict,
-                                   verbose):
+                                   args):
   target_handlers = {
       'x86_64': (scrub_asm_x86, ASM_FUNCTION_X86_RE),
       'i686': (scrub_asm_x86, ASM_FUNCTION_X86_RE),
       'x86': (scrub_asm_x86, ASM_FUNCTION_X86_RE),
       'i386': (scrub_asm_x86, ASM_FUNCTION_X86_RE),
+      'aarch64': (scrub_asm_arm_eabi, ASM_FUNCTION_AARCH64_RE),
       'arm-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
       'thumb-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv6': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv6-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv6t2': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv6t2-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv6m': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv6m-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv7': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv7-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv7m': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv7m-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
       'thumbv8-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv8m.base': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv8m.main': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'armv6': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'armv7': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'armv7-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
       'armeb-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'powerpc64le': (scrub_asm_powerpc64le, ASM_FUNCTION_PPC_RE),
+      'armv7eb-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'armv7eb': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'mips': (scrub_asm_mips, ASM_FUNCTION_MIPS_RE),
+      'powerpc64': (scrub_asm_powerpc64, ASM_FUNCTION_PPC_RE),
+      'powerpc64le': (scrub_asm_powerpc64, ASM_FUNCTION_PPC_RE),
+      'riscv32': (scrub_asm_riscv, ASM_FUNCTION_RISCV_RE),
+      'riscv64': (scrub_asm_riscv, ASM_FUNCTION_RISCV_RE),
       's390x': (scrub_asm_systemz, ASM_FUNCTION_SYSTEMZ_RE),
   }
   handlers = None
@@ -161,11 +231,11 @@ def build_function_body_dictionary(raw_tool_output, triple, prefixes, func_dict,
     if not m:
       continue
     func = m.group('func')
-    scrubbed_body = scrubber(m.group('body'))
+    scrubbed_body = scrubber(m.group('body'), args)
     if func.startswith('stress'):
       # We only use the last line of the function body for stress tests.
       scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
-    if verbose:
+    if args.verbose:
       print >>sys.stderr, 'Processing function: ' + func
       for l in scrubbed_body.splitlines():
         print >>sys.stderr, '  ' + l
@@ -228,6 +298,9 @@ def main():
                       help='The "llc" binary to use to generate the test case')
   parser.add_argument(
       '--function', help='The function in the test file to update')
+  parser.add_argument(
+      '--x86_extra_scrub', action='store_true',
+      help='Use more regex for x86 matching to reduce diffs between various subtargets')
   parser.add_argument('tests', nargs='+')
   args = parser.parse_args()
 
@@ -309,7 +382,7 @@ def main():
         print >>sys.stderr, "Cannot find a triple. Assume 'x86'"
 
       build_function_body_dictionary(raw_tool_output,
-          triple_in_cmd or triple_in_ir or 'x86', prefixes, func_dict, args.verbose)
+          triple_in_cmd or triple_in_ir or 'x86', prefixes, func_dict, args)
 
     is_in_function = False
     is_in_function_start = False
diff --git a/utils/update_mir_test_checks.py b/utils/update_mir_test_checks.py
new file mode 100755
index 000000000000..2934f09f6b37
--- /dev/null
+++ b/utils/update_mir_test_checks.py
@@ -0,0 +1,427 @@
+#!/usr/bin/env python
+
+"""Updates FileCheck checks in MIR tests.
+
+This script is a utility to update MIR based tests with new FileCheck
+patterns.
+
+The checks added by this script will cover the entire body of each
+function it handles. Virtual registers used are given names via
+FileCheck patterns, so if you do want to check a subset of the body it
+should be straightforward to trim out the irrelevant parts. None of
+the YAML metadata will be checked, other than function names.
+
+If there are multiple llc commands in a test, the full set of checks
+will be repeated for each different check pattern. Checks for patterns
+that are common between different commands will be left as-is by
+default, or removed if the --remove-common-prefixes flag is provided.
+"""
+
+from __future__ import print_function
+
+import argparse
+import collections
+import os
+import re
+import subprocess
+import sys
+
+RUN_LINE_RE = re.compile('^\s*[;#]\s*RUN:\s*(.*)$')
+TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)')
+MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)')
+TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
+CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?[= ](\S+)')
+CHECK_RE = re.compile(r'^\s*[;#]\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
+
+FUNC_NAME_RE = re.compile(r' *name: *(?P<func>[A-Za-z0-9_.-]+)')
+BODY_BEGIN_RE = re.compile(r' *body: *\|')
+BASIC_BLOCK_RE = re.compile(r' *bb\.[0-9]+.*:$')
+VREG_RE = re.compile(r'(%[0-9]+)(?::[a-z0-9_]+)?(?:\([<>a-z0-9 ]+\))?')
+VREG_DEF_RE = re.compile(
+    r'^ *(?P<vregs>{0}(?:, {0})*) '
+    r'= (?P<opcode>[A-Zt][A-Za-z0-9_]+)'.format(VREG_RE.pattern))
+PREFIX_DATA_RE = re.compile(r'^ *(;|bb.[0-9].*: *$|[a-z]+:( |$)|$)')
+VREG_CLASS_RE = re.compile(r'^ *- *{ id: ([0-9]+), class: ([a-z0-9_]+)', re.M)
+
+MIR_FUNC_RE = re.compile(
+    r'^---$'
+    r'\n'
+    r'^ *name: *(?P<func>[A-Za-z0-9_.-]+)$'
+    r'(?:.*?(?P<vregs>^ *registers: *(?:\n *- {[^\n]+$)*))?'
+    r'.*?'
+    r'^ *body: *\|\n'
+    r'(?P<body>.*?)\n'
+    r'^\.\.\.$',
+    flags=(re.M | re.S))
+
+class LLC:
+    def __init__(self, bin):
+        self.bin = bin
+
+    def __call__(self, args, ir):
+        if ir.endswith('.mir'):
+            args = '{} -x mir'.format(args)
+        with open(ir) as ir_file:
+            stdout = subprocess.check_output('{} {}'.format(self.bin, args),
+                                             shell=True, stdin=ir_file)
+            # Fix line endings to unix CR style.
+            stdout = stdout.replace('\r\n', '\n')
+        return stdout
+
+
+class Run:
+    def __init__(self, prefixes, cmd_args, triple):
+        self.prefixes = prefixes
+        self.cmd_args = cmd_args
+        self.triple = triple
+
+    def __getitem__(self, index):
+        return [self.prefixes, self.cmd_args, self.triple][index]
+
+
+def log(msg, verbose=True):
+    if verbose:
+        print(msg, file=sys.stderr)
+
+
+def warn(msg, test_file=None):
+    if test_file:
+        msg = '{}: {}'.format(test_file, msg)
+    print('WARNING: {}'.format(msg), file=sys.stderr)
+
+
+def find_triple_in_ir(lines, verbose=False):
+    for l in lines:
+        m = TRIPLE_IR_RE.match(l)
+        if m:
+            return m.group(1)
+    return None
+
+
+def find_run_lines(test, lines, verbose=False):
+    raw_lines = [m.group(1)
+                 for m in [RUN_LINE_RE.match(l) for l in lines] if m]
+    run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
+    for l in raw_lines[1:]:
+        if run_lines[-1].endswith("\\"):
+            run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
+        else:
+            run_lines.append(l)
+    if verbose:
+        log('Found {} RUN lines:'.format(len(run_lines)))
+        for l in run_lines:
+            log('  RUN: {}'.format(l))
+    return run_lines
+
+
+def build_run_list(test, run_lines, verbose=False):
+    run_list = []
+    all_prefixes = []
+    for l in run_lines:
+        commands = [cmd.strip() for cmd in l.split('|', 1)]
+        llc_cmd = commands[0]
+        filecheck_cmd = commands[1] if len(commands) > 1 else ''
+
+        if not llc_cmd.startswith('llc '):
+            warn('Skipping non-llc RUN line: {}'.format(l), test_file=test)
+            continue
+        if not filecheck_cmd.startswith('FileCheck '):
+            warn('Skipping non-FileChecked RUN line: {}'.format(l),
+                 test_file=test)
+            continue
+
+        triple = None
+        m = TRIPLE_ARG_RE.search(llc_cmd)
+        if m:
+            triple = m.group(1)
+        # If we find -march but not -mtriple, use that.
+        m = MARCH_ARG_RE.search(llc_cmd)
+        if m and not triple:
+            triple = '{}--'.format(m.group(1))
+
+        cmd_args = llc_cmd[len('llc'):].strip()
+        cmd_args = cmd_args.replace('< %s', '').replace('%s', '').strip()
+
+        check_prefixes = [item for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
+                          for item in m.group(1).split(',')]
+        if not check_prefixes:
+            check_prefixes = ['CHECK']
+        all_prefixes += check_prefixes
+
+        run_list.append(Run(check_prefixes, cmd_args, triple))
+
+    # Remove any common prefixes. We'll just leave those entirely alone.
+    common_prefixes = set([prefix for prefix in all_prefixes
+                           if all_prefixes.count(prefix) > 1])
+    for run in run_list:
+        run.prefixes = [p for p in run.prefixes if p not in common_prefixes]
+
+    return run_list, common_prefixes
+
+
+def find_functions_with_one_bb(lines, verbose=False):
+    result = []
+    cur_func = None
+    bbs = 0
+    for line in lines:
+        m = FUNC_NAME_RE.match(line)
+        if m:
+            if bbs == 1:
+                result.append(cur_func)
+            cur_func = m.group('func')
+            bbs = 0
+        m = BASIC_BLOCK_RE.match(line)
+        if m:
+            bbs += 1
+    if bbs == 1:
+        result.append(cur_func)
+    return result
+
+
+def build_function_body_dictionary(test, raw_tool_output, triple, prefixes,
+                                   func_dict, verbose):
+    for m in MIR_FUNC_RE.finditer(raw_tool_output):
+        func = m.group('func')
+        body = m.group('body')
+        if verbose:
+            log('Processing function: {}'.format(func))
+            for l in body.splitlines():
+                log('  {}'.format(l))
+        for prefix in prefixes:
+            if func in func_dict[prefix] and func_dict[prefix][func] != body:
+                warn('Found conflicting asm for prefix: {}'.format(prefix),
+                     test_file=test)
+            func_dict[prefix][func] = body
+            func_dict[prefix]['{}:vregs'.format(func)] = m.group('vregs')
+
+
+def add_checks_for_function(test, output_lines, run_list, func_dict, func_name,
+                            add_vreg_checks, single_bb, verbose=False):
+    printed_prefixes = set()
+    for run in run_list:
+        for prefix in run.prefixes:
+            if prefix in printed_prefixes:
+                continue
+            if not func_dict[prefix][func_name]:
+                continue
+            # if printed_prefixes:
+            #     # Add some space between different check prefixes.
+            #     output_lines.append('')
+            printed_prefixes.add(prefix)
+            log('Adding {} lines for {}'.format(prefix, func_name), verbose)
+            vregs = None
+            if add_vreg_checks:
+                vregs = func_dict[prefix]['{}:vregs'.format(func_name)]
+            add_check_lines(test, output_lines, prefix, func_name, single_bb,
+                            func_dict[prefix][func_name].splitlines(), vregs)
+            break
+    return output_lines
+
+
+def add_check_lines(test, output_lines, prefix, func_name, single_bb,
+                    func_body, vreg_data):
+    if single_bb:
+        # Don't bother checking the basic block label for a single BB
+        func_body.pop(0)
+
+    if not func_body:
+        warn('Function has no instructions to check: {}'.format(func_name),
+             test_file=test)
+        return
+
+    first_line = func_body[0]
+    indent = len(first_line) - len(first_line.lstrip(' '))
+    # A check comment, indented the appropriate amount
+    check = '{:>{}}; {}'.format('', indent, prefix)
+
+    output_lines.append('{}-LABEL: name: {}'.format(check, func_name))
+
+    if vreg_data:
+        output_lines.append('{}: registers:'.format(check))
+        for m in VREG_CLASS_RE.finditer(vreg_data):
+            output_lines.append('{}-NEXT: id: {}, class: {}'.format(
+                check, m.group(1), m.group(2)))
+
+    vreg_map = {}
+    for func_line in func_body:
+        if not func_line.strip():
+            continue
+        m = VREG_DEF_RE.match(func_line)
+        if m:
+            for vreg in VREG_RE.finditer(m.group('vregs')):
+                name = mangle_vreg(m.group('opcode'), vreg_map.values())
+                vreg_map[vreg.group(1)] = name
+                func_line = func_line.replace(
+                    vreg.group(1), '[[{}:%[0-9]+]]'.format(name), 1)
+        for number, name in vreg_map.items():
+            func_line = re.sub(r'{}\b'.format(number), '[[{}]]'.format(name),
+                               func_line)
+        check_line = '{}: {}'.format(check, func_line[indent:]).rstrip()
+        output_lines.append(check_line)
+
+
+def mangle_vreg(opcode, current_names):
+    base = opcode
+    # Simplify some common prefixes and suffixes
+    if opcode.startswith('G_'):
+        base = base[len('G_'):]
+    if opcode.endswith('_PSEUDO'):
+        base = base[:len('_PSEUDO')]
+    # Shorten some common opcodes with long-ish names
+    base = dict(IMPLICIT_DEF='DEF',
+                GLOBAL_VALUE='GV',
+                CONSTANT='C',
+                FCONSTANT='C',
+                MERGE_VALUES='MV',
+                UNMERGE_VALUES='UV',
+                INTRINSIC='INT',
+                INTRINSIC_W_SIDE_EFFECTS='INT',
+                INSERT_VECTOR_ELT='IVEC',
+                EXTRACT_VECTOR_ELT='EVEC',
+                SHUFFLE_VECTOR='SHUF').get(base, base)
+    # Avoid ambiguity when opcodes end in numbers
+    if len(base.rstrip('0123456789')) < len(base):
+        base += '_'
+
+    i = 0
+    for name in current_names:
+        if name.rstrip('0123456789') == base:
+            i += 1
+    if i:
+        return '{}{}'.format(base, i)
+    return base
+
+
+def should_add_line_to_output(input_line, prefix_set):
+    # Skip any check lines that we're handling.
+    m = CHECK_RE.match(input_line)
+    if m and m.group(1) in prefix_set:
+        return False
+    return True
+
+
+def update_test_file(llc, test, remove_common_prefixes=False,
+                     add_vreg_checks=False, verbose=False):
+    log('Scanning for RUN lines in test file: {}'.format(test), verbose)
+    with open(test) as fd:
+        input_lines = [l.rstrip() for l in fd]
+
+    triple_in_ir = find_triple_in_ir(input_lines, verbose)
+    run_lines = find_run_lines(test, input_lines, verbose)
+    run_list, common_prefixes = build_run_list(test, run_lines, verbose)
+
+    simple_functions = find_functions_with_one_bb(input_lines, verbose)
+
+    func_dict = {}
+    for run in run_list:
+        for prefix in run.prefixes:
+            func_dict.update({prefix: dict()})
+    for prefixes, llc_args, triple_in_cmd in run_list:
+        log('Extracted LLC cmd: llc {}'.format(llc_args), verbose)
+        log('Extracted FileCheck prefixes: {}'.format(prefixes), verbose)
+
+        raw_tool_output = llc(llc_args, test)
+        if not triple_in_cmd and not triple_in_ir:
+            warn('No triple found: skipping file', test_file=test)
+            return
+
+        build_function_body_dictionary(test, raw_tool_output,
+                                       triple_in_cmd or triple_in_ir,
+                                       prefixes, func_dict, verbose)
+
+    state = 'toplevel'
+    func_name = None
+    prefix_set = set([prefix for run in run_list for prefix in run.prefixes])
+    log('Rewriting FileCheck prefixes: {}'.format(prefix_set), verbose)
+
+    if remove_common_prefixes:
+        prefix_set.update(common_prefixes)
+    elif common_prefixes:
+        warn('Ignoring common prefixes: {}'.format(common_prefixes),
+             test_file=test)
+
+    autogenerated_note = ('# NOTE: Assertions have been autogenerated by '
+                          'utils/{}'.format(os.path.basename(__file__)))
+    output_lines = []
+    output_lines.append(autogenerated_note)
+
+    for input_line in input_lines:
+        if input_line == autogenerated_note:
+            continue
+
+        if state == 'toplevel':
+            if input_line.strip() == '---':
+                state = 'document'
+            output_lines.append(input_line)
+        elif state == 'document':
+            m = FUNC_NAME_RE.match(input_line)
+            if m:
+                state = 'function metadata'
+                func_name = m.group('func')
+            if input_line.strip() == '...':
+                state = 'toplevel'
+                func_name = None
+            if should_add_line_to_output(input_line, prefix_set):
+                output_lines.append(input_line)
+        elif state == 'function metadata':
+            if should_add_line_to_output(input_line, prefix_set):
+                output_lines.append(input_line)
+            m = BODY_BEGIN_RE.match(input_line)
+            if m:
+                if func_name in simple_functions:
+                    # If there's only one block, put the checks inside it
+                    state = 'function prefix'
+                    continue
+                state = 'function body'
+                add_checks_for_function(test, output_lines, run_list,
+                                        func_dict, func_name, add_vreg_checks,
+                                        single_bb=False, verbose=verbose)
+        elif state == 'function prefix':
+            m = PREFIX_DATA_RE.match(input_line)
+            if not m:
+                state = 'function body'
+                add_checks_for_function(test, output_lines, run_list,
+                                        func_dict, func_name, add_vreg_checks,
+                                        single_bb=True, verbose=verbose)
+
+            if should_add_line_to_output(input_line, prefix_set):
+                output_lines.append(input_line)
+        elif state == 'function body':
+            if input_line.strip() == '...':
+                state = 'toplevel'
+                func_name = None
+            if should_add_line_to_output(input_line, prefix_set):
+                output_lines.append(input_line)
+
+    log('Writing {} lines to {}...'.format(len(output_lines), test), verbose)
+
+    with open(test, 'wb') as fd:
+        fd.writelines([l + '\n' for l in output_lines])
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
+    parser.add_argument('-v', '--verbose', action='store_true',
+                        help='Show verbose output')
+    parser.add_argument('--llc-binary', dest='llc', default='llc', type=LLC,
+                        help='The "llc" binary to generate the test case with')
+    parser.add_argument('--remove-common-prefixes', action='store_true',
+                        help='Remove existing check lines whose prefixes are '
+                             'shared between multiple commands')
+    parser.add_argument('--add-vreg-checks', action='store_true',
+                        help='Add checks for the "registers:" block')
+    parser.add_argument('tests', nargs='+')
+    args = parser.parse_args()
+
+    for test in args.tests:
+        try:
+            update_test_file(args.llc, test, args.remove_common_prefixes,
+                             args.add_vreg_checks, verbose=args.verbose)
+        except Exception:
+            warn('Error processing file', test_file=test)
+            raise
+
+
+if __name__ == '__main__':
+  main()
diff --git a/utils/vim/syntax/llvm.vim b/utils/vim/syntax/llvm.vim
index e110da4329b5..42a4cf3cf495 100644
--- a/utils/vim/syntax/llvm.vim
+++ b/utils/vim/syntax/llvm.vim
@@ -1,7 +1,7 @@
 " Vim syntax file
 " Language:   llvm
 " Maintainer: The LLVM team, http://llvm.org/
-" Version:      $Revision: 308208 $
+" Version:      $Revision: 310885 $
 
 if version < 600
   syntax clear
@@ -144,6 +144,7 @@ syn keyword llvmKeyword
       \ ssp
       \ sspreq
       \ sspstrong
+      \ strictfp
       \ swiftcc
       \ tail
       \ target
diff --git a/utils/yaml-bench/CMakeLists.txt b/utils/yaml-bench/CMakeLists.txt
index 403182ceee2a..cd04b33d0a29 100644
--- a/utils/yaml-bench/CMakeLists.txt
+++ b/utils/yaml-bench/CMakeLists.txt
@@ -2,4 +2,4 @@ add_llvm_utility(yaml-bench
   YAMLBench.cpp
   )
 
-target_link_libraries(yaml-bench LLVMSupport)
+target_link_libraries(yaml-bench PRIVATE LLVMSupport)