153 files changed, 10842 insertions, 4162 deletions
diff --git a/utils/FileCheck/FileCheck.cpp b/utils/FileCheck/FileCheck.cpp
index 7db97301637d..b9cd99e8ffd5 100644
--- a/utils/FileCheck/FileCheck.cpp
+++ b/utils/FileCheck/FileCheck.cpp
@@ -21,14 +21,14 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/InitLLVM.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Regex.h"
-#include "llvm/Support/Signals.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cctype>
+#include <list>
 #include <map>
 #include <string>
 #include <system_error>
@@ -83,6 +83,28 @@ static cl::opt<bool> EnableVarScope(
              "do not start with '$' will be reset at the beginning of\n"
              "each CHECK-LABEL block."));
 
+static cl::opt<bool> AllowDeprecatedDagOverlap(
+    "allow-deprecated-dag-overlap", cl::init(false),
+    cl::desc("Enable overlapping among matches in a group of consecutive\n"
+             "CHECK-DAG directives.  This option is deprecated and is only\n"
+             "provided for convenience as old tests are migrated to the new\n"
+             "non-overlapping CHECK-DAG implementation.\n"));
+
+static cl::opt<bool> Verbose("v", cl::init(false),
+                             cl::desc("Print directive pattern matches.\n"));
+
+static cl::opt<bool> VerboseVerbose(
+    "vv", cl::init(false),
+    cl::desc("Print information helpful in diagnosing internal FileCheck\n"
+             "issues.  Implies -v.\n"));
+static const char * DumpInputEnv = "FILECHECK_DUMP_INPUT_ON_FAILURE";
+
+static cl::opt<bool> DumpInputOnFailure(
+    "dump-input-on-failure", cl::init(std::getenv(DumpInputEnv)),
+    cl::desc("Dump original input to stderr before failing.\n"
+             "The value can be also controlled using\n"
+             "FILECHECK_DUMP_INPUT_ON_FAILURE environment variable.\n"));
+
 typedef cl::list<std::string>::const_iterator prefix_iterator;
 
 //===----------------------------------------------------------------------===//
@@ -98,6 +120,7 @@ enum CheckType {
   CheckNot,
   CheckDAG,
   CheckLabel,
+  CheckEmpty,
 
   /// Indicates the pattern only matches the end of file. This is used for
   /// trailing CHECK-NOTs.
@@ -146,8 +169,11 @@ public:
                     unsigned LineNumber);
   size_t Match(StringRef Buffer, size_t &MatchLen,
                StringMap<StringRef> &VariableTable) const;
-  void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
-                        const StringMap<StringRef> &VariableTable) const;
+  void PrintVariableUses(const SourceMgr &SM, StringRef Buffer,
+                         const StringMap<StringRef> &VariableTable,
+                         SMRange MatchRange = None) const;
+  void PrintFuzzyMatch(const SourceMgr &SM, StringRef Buffer,
+                       const StringMap<StringRef> &VariableTable) const;
 
   bool hasVariable() const {
     return !(VariableUses.empty() && VariableDefs.empty());
@@ -185,12 +211,25 @@ bool Pattern::ParsePattern(StringRef PatternStr, StringRef Prefix,
       PatternStr = PatternStr.substr(0, PatternStr.size() - 1);
 
   // Check that there is something on the line.
-  if (PatternStr.empty()) {
+  if (PatternStr.empty() && CheckTy != Check::CheckEmpty) {
     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
                     "found empty check string with prefix '" + Prefix + ":'");
     return true;
   }
 
+  if (!PatternStr.empty() && CheckTy == Check::CheckEmpty) {
+    SM.PrintMessage(
+        PatternLoc, SourceMgr::DK_Error,
+        "found non-empty check string for empty check with prefix '" + Prefix +
+            ":'");
+    return true;
+  }
+
+  if (CheckTy == Check::CheckEmpty) {
+    RegExStr = "(\n$)";
+    return false;
+  }
+
   // Check to see if this is a fixed string, or if it has regex pieces.
   if (!MatchFullLinesHere &&
       (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
@@ -463,8 +502,12 @@ size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
     VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
   }
 
-  MatchLen = FullMatch.size();
-  return FullMatch.data() - Buffer.data();
+  // Like CHECK-NEXT, CHECK-EMPTY's match range is considered to start after
+  // the required preceding newline, which is consumed by the pattern in the
+  // case of CHECK-EMPTY but not CHECK-NEXT.
+  size_t MatchStartSkip = CheckTy == Check::CheckEmpty;
+  MatchLen = FullMatch.size() - MatchStartSkip;
+  return FullMatch.data() - Buffer.data() + MatchStartSkip;
 }
 
 
@@ -490,11 +533,9 @@ Pattern::ComputeMatchDistance(StringRef Buffer,
   return BufferPrefix.edit_distance(ExampleString);
 }
 
-/// Prints additional information about a failure to match involving this
-/// pattern.
-void Pattern::PrintFailureInfo(
-    const SourceMgr &SM, StringRef Buffer,
-    const StringMap<StringRef> &VariableTable) const {
+void Pattern::PrintVariableUses(const SourceMgr &SM, StringRef Buffer,
+                                const StringMap<StringRef> &VariableTable,
+                                SMRange MatchRange) const {
   // If this was a regular expression using variables, print the current
   // variable values.
   if (!VariableUses.empty()) {
@@ -526,11 +567,19 @@ void Pattern::PrintFailureInfo(
         }
       }
 
-      SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
-                      OS.str());
+      if (MatchRange.isValid())
+        SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, OS.str(),
+                        {MatchRange});
+      else
+        SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
+                        SourceMgr::DK_Note, OS.str());
     }
   }
+}
 
+void Pattern::PrintFuzzyMatch(
+    const SourceMgr &SM, StringRef Buffer,
+    const StringMap<StringRef> &VariableTable) const {
   // Attempt to find the closest/best fuzzy match.  Usually an error happens
   // because some string in the output didn't exactly match. In these cases, we
   // would like to show the user a best guess at what "should have" matched, to
@@ -710,6 +759,9 @@ static size_t CheckTypeSize(Check::CheckType Ty) {
   case Check::CheckLabel:
     return sizeof("-LABEL:") - 1;
 
+  case Check::CheckEmpty:
+    return sizeof("-EMPTY:") - 1;
+
   case Check::CheckEOF:
     llvm_unreachable("Should not be using EOF size");
   }
@@ -717,7 +769,37 @@ static size_t CheckTypeSize(Check::CheckType Ty) {
   llvm_unreachable("Bad check type");
 }
 
+// Get a description of the type.
+static std::string CheckTypeName(StringRef Prefix, Check::CheckType Ty) {
+  switch (Ty) {
+  case Check::CheckNone:
+    return "invalid";
+  case Check::CheckPlain:
+    return Prefix;
+  case Check::CheckNext:
+    return Prefix.str() + "-NEXT";
+  case Check::CheckSame:
+    return Prefix.str() + "-SAME";
+  case Check::CheckNot:
+    return Prefix.str() + "-NOT";
+  case Check::CheckDAG:
+    return Prefix.str() + "-DAG";
+  case Check::CheckLabel:
+    return Prefix.str() + "-LABEL";
+  case Check::CheckEmpty:
+    return Prefix.str() + "-EMPTY";
+  case Check::CheckEOF:
+    return "implicit EOF";
+  case Check::CheckBadNot:
+    return "bad NOT";
+  }
+  llvm_unreachable("unknown CheckType");
+}
+
 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
+  if (Buffer.size() <= Prefix.size())
+    return Check::CheckNone;
+
   char NextChar = Buffer[Prefix.size()];
 
   // Verify that the : is present after the prefix.
@@ -743,10 +825,14 @@ static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
   if (Rest.startswith("LABEL:"))
     return Check::CheckLabel;
 
+  if (Rest.startswith("EMPTY:"))
+    return Check::CheckEmpty;
+
   // You can't combine -NOT with another suffix.
   if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
       Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
-      Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:"))
+      Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:") ||
+      Rest.startswith("EMPTY-NOT:") || Rest.startswith("NOT-EMPTY:"))
     return Check::CheckBadNot;
 
   return Check::CheckNone;
@@ -906,10 +992,13 @@ static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
 
     Buffer = Buffer.substr(EOL);
 
-    // Verify that CHECK-NEXT lines have at least one CHECK line before them.
-    if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) &&
+    // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them.
+    if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame ||
+         CheckTy == Check::CheckEmpty) &&
         CheckStrings.empty()) {
-      StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME";
+      StringRef Type = CheckTy == Check::CheckNext
+                           ? "NEXT"
+                           : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME";
       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
                       SourceMgr::DK_Error,
                       "found '" + UsedPrefix + "-" + Type +
@@ -956,12 +1045,49 @@ static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
   return false;
 }
 
-static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc, const Pattern &Pat,
-                             StringRef Buffer,
-                             StringMap<StringRef> &VariableTable) {
+static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
+                       StringRef Prefix, SMLoc Loc, const Pattern &Pat,
+                       StringRef Buffer, StringMap<StringRef> &VariableTable,
+                       size_t MatchPos, size_t MatchLen) {
+  if (ExpectedMatch) {
+    if (!Verbose)
+      return;
+    if (!VerboseVerbose && Pat.getCheckTy() == Check::CheckEOF)
+      return;
+  }
+  SMLoc MatchStart = SMLoc::getFromPointer(Buffer.data() + MatchPos);
+  SMLoc MatchEnd = SMLoc::getFromPointer(Buffer.data() + MatchPos + MatchLen);
+  SMRange MatchRange(MatchStart, MatchEnd);
+  SM.PrintMessage(
+      Loc, ExpectedMatch ? SourceMgr::DK_Remark : SourceMgr::DK_Error,
+      CheckTypeName(Prefix, Pat.getCheckTy()) + ": " +
+          (ExpectedMatch ? "expected" : "excluded") +
+          " string found in input");
+  SM.PrintMessage(MatchStart, SourceMgr::DK_Note, "found here", {MatchRange});
+  Pat.PrintVariableUses(SM, Buffer, VariableTable, MatchRange);
+}
+
+static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
+                       const CheckString &CheckStr, StringRef Buffer,
+                       StringMap<StringRef> &VariableTable, size_t MatchPos,
+                       size_t MatchLen) {
+  PrintMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat,
+             Buffer, VariableTable, MatchPos, MatchLen);
+}
+
+static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
+                         StringRef Prefix, SMLoc Loc, const Pattern &Pat,
+                         StringRef Buffer,
+                         StringMap<StringRef> &VariableTable) {
+  if (!ExpectedMatch && !VerboseVerbose)
+    return;
+
   // Otherwise, we have an error, emit an error message.
-  SM.PrintMessage(Loc, SourceMgr::DK_Error,
-                  "expected string not found in input");
+  SM.PrintMessage(Loc,
+                  ExpectedMatch ? SourceMgr::DK_Error : SourceMgr::DK_Remark,
+                  CheckTypeName(Prefix, Pat.getCheckTy()) + ": " +
+                      (ExpectedMatch ? "expected" : "excluded") +
+                      " string not found in input");
 
   // Print the "scanning from here" line.  If the current position is at the
   // end of a line, advance to the start of the next line.
@@ -971,13 +1097,16 @@ static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc, const Pattern &Pat,
                   "scanning from here");
 
   // Allow the pattern to print additional information if desired.
-  Pat.PrintFailureInfo(SM, Buffer, VariableTable);
+  Pat.PrintVariableUses(SM, Buffer, VariableTable);
+  if (ExpectedMatch)
+    Pat.PrintFuzzyMatch(SM, Buffer, VariableTable);
 }
 
-static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
-                             StringRef Buffer,
-                             StringMap<StringRef> &VariableTable) {
-  PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
+static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
+                         const CheckString &CheckStr, StringRef Buffer,
+                         StringMap<StringRef> &VariableTable) {
+  PrintNoMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat,
+               Buffer, VariableTable);
 }
 
 /// Count the number of newlines in the specified range.
@@ -1025,9 +1154,10 @@ size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
   StringRef MatchBuffer = Buffer.substr(LastPos);
   size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
   if (MatchPos == StringRef::npos) {
-    PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
+    PrintNoMatch(true, SM, *this, MatchBuffer, VariableTable);
     return StringRef::npos;
   }
+  PrintMatch(true, SM, *this, MatchBuffer, VariableTable, MatchPos, MatchLen);
 
   // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
   // or CHECK-NOT
@@ -1055,22 +1185,27 @@ size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
 
 /// Verify there is a single line in the given buffer.
 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
-  if (Pat.getCheckTy() != Check::CheckNext)
+  if (Pat.getCheckTy() != Check::CheckNext &&
+      Pat.getCheckTy() != Check::CheckEmpty)
     return false;
 
+  Twine CheckName =
+      Prefix +
+      Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT");
+
   // Count the number of newlines between the previous match and this one.
   assert(Buffer.data() !=
              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
                                     SMLoc::getFromPointer(Buffer.data())))
                  ->getBufferStart() &&
-         "CHECK-NEXT can't be the first check in a file");
+         "CHECK-NEXT and CHECK-EMPTY can't be the first check in a file");
 
   const char *FirstNewLine = nullptr;
   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
 
   if (NumNewLines == 0) {
     SM.PrintMessage(Loc, SourceMgr::DK_Error,
-                    Prefix + "-NEXT: is on the same line as previous match");
+                    CheckName + ": is on the same line as previous match");
     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
                     "'next' match was here");
     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
@@ -1080,8 +1215,8 @@ bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
 
   if (NumNewLines != 1) {
     SM.PrintMessage(Loc, SourceMgr::DK_Error,
-                    Prefix +
-                        "-NEXT: is not on the line after the previous match");
+                    CheckName +
+                        ": is not on the line after the previous match");
     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
                     "'next' match was here");
     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
@@ -1133,13 +1268,15 @@ bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
     size_t MatchLen = 0;
     size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
 
-    if (Pos == StringRef::npos)
+    if (Pos == StringRef::npos) {
+      PrintNoMatch(false, SM, Prefix, Pat->getLoc(), *Pat, Buffer,
+                   VariableTable);
       continue;
+    }
+
+    PrintMatch(false, SM, Prefix, Pat->getLoc(), *Pat, Buffer, VariableTable,
+               Pos, MatchLen);
 
-    SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Pos),
-                    SourceMgr::DK_Error, Prefix + "-NOT: string occurred!");
-    SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
-                    Prefix + "-NOT: pattern specified here");
     return true;
   }
 
@@ -1153,10 +1290,23 @@ size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
   if (DagNotStrings.empty())
     return 0;
 
-  size_t LastPos = 0;
-  size_t StartPos = LastPos;
-
-  for (const Pattern &Pat : DagNotStrings) {
+  // The start of the search range.
+  size_t StartPos = 0;
+
+  struct MatchRange {
+    size_t Pos;
+    size_t End;
+  };
+  // A sorted list of ranges for non-overlapping CHECK-DAG matches.  Match
+  // ranges are erased from this list once they are no longer in the search
+  // range.
+  std::list<MatchRange> MatchRanges;
+
+  // We need PatItr and PatEnd later for detecting the end of a CHECK-DAG
+  // group, so we don't use a range-based for loop here.
+  for (auto PatItr = DagNotStrings.begin(), PatEnd = DagNotStrings.end();
+       PatItr != PatEnd; ++PatItr) {
+    const Pattern &Pat = *PatItr;
     assert((Pat.getCheckTy() == Check::CheckDAG ||
             Pat.getCheckTy() == Check::CheckNot) &&
            "Invalid CHECK-DAG or CHECK-NOT!");
@@ -1168,57 +1318,92 @@ size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
 
     assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
 
-    size_t MatchLen = 0, MatchPos;
-
     // CHECK-DAG always matches from the start.
-    StringRef MatchBuffer = Buffer.substr(StartPos);
-    MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
-    // With a group of CHECK-DAGs, a single mismatching means the match on
-    // that group of CHECK-DAGs fails immediately.
-    if (MatchPos == StringRef::npos) {
-      PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
-      return StringRef::npos;
-    }
-    // Re-calc it as the offset relative to the start of the original string.
-    MatchPos += StartPos;
-
-    if (!NotStrings.empty()) {
-      if (MatchPos < LastPos) {
-        // Reordered?
-        SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
-                        SourceMgr::DK_Error,
-                        Prefix + "-DAG: found a match of CHECK-DAG"
-                                 " reordering across a CHECK-NOT");
-        SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
-                        SourceMgr::DK_Note,
-                        Prefix + "-DAG: the farthest match of CHECK-DAG"
-                                 " is found here");
-        SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
-                        Prefix + "-NOT: the crossed pattern specified"
-                                 " here");
-        SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
-                        Prefix + "-DAG: the reordered pattern specified"
-                                 " here");
+    size_t MatchLen = 0, MatchPos = StartPos;
+
+    // Search for a match that doesn't overlap a previous match in this
+    // CHECK-DAG group.
+    for (auto MI = MatchRanges.begin(), ME = MatchRanges.end(); true; ++MI) {
+      StringRef MatchBuffer = Buffer.substr(MatchPos);
+      size_t MatchPosBuf = Pat.Match(MatchBuffer, MatchLen, VariableTable);
+      // With a group of CHECK-DAGs, a single mismatching means the match on
+      // that group of CHECK-DAGs fails immediately.
+      if (MatchPosBuf == StringRef::npos) {
+        PrintNoMatch(true, SM, Prefix, Pat.getLoc(), Pat, MatchBuffer,
+                     VariableTable);
         return StringRef::npos;
       }
-      // All subsequent CHECK-DAGs should be matched from the farthest
-      // position of all precedent CHECK-DAGs (including this one.)
-      StartPos = LastPos;
-      // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
-      // CHECK-DAG, verify that there's no 'not' strings occurred in that
-      // region.
-      StringRef SkippedRegion = Buffer.slice(LastPos, MatchPos);
-      if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
-        return StringRef::npos;
-      // Clear "not strings".
-      NotStrings.clear();
+      // Re-calc it as the offset relative to the start of the original string.
+      MatchPos += MatchPosBuf;
+      if (VerboseVerbose)
+        PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, Buffer, VariableTable,
+                   MatchPos, MatchLen);
+      MatchRange M{MatchPos, MatchPos + MatchLen};
+      if (AllowDeprecatedDagOverlap) {
+        // We don't need to track all matches in this mode, so we just maintain
+        // one match range that encompasses the current CHECK-DAG group's
+        // matches.
+        if (MatchRanges.empty())
+          MatchRanges.insert(MatchRanges.end(), M);
+        else {
+          auto Block = MatchRanges.begin();
+          Block->Pos = std::min(Block->Pos, M.Pos);
+          Block->End = std::max(Block->End, M.End);
+        }
+        break;
+      }
+      // Iterate previous matches until overlapping match or insertion point.
+      bool Overlap = false;
+      for (; MI != ME; ++MI) {
+        if (M.Pos < MI->End) {
+          // !Overlap => New match has no overlap and is before this old match.
+          // Overlap => New match overlaps this old match.
+          Overlap = MI->Pos < M.End;
+          break;
+        }
+      }
+      if (!Overlap) {
+        // Insert non-overlapping match into list.
+        MatchRanges.insert(MI, M);
+        break;
+      }
+      if (VerboseVerbose) {
+        SMLoc OldStart = SMLoc::getFromPointer(Buffer.data() + MI->Pos);
+        SMLoc OldEnd = SMLoc::getFromPointer(Buffer.data() + MI->End);
+        SMRange OldRange(OldStart, OldEnd);
+        SM.PrintMessage(OldStart, SourceMgr::DK_Note,
+                        "match discarded, overlaps earlier DAG match here",
+                        {OldRange});
+      }
+      MatchPos = MI->End;
+    }
+    if (!VerboseVerbose)
+      PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, Buffer, VariableTable,
+                 MatchPos, MatchLen);
+
+    // Handle the end of a CHECK-DAG group.
+    if (std::next(PatItr) == PatEnd ||
+        std::next(PatItr)->getCheckTy() == Check::CheckNot) {
+      if (!NotStrings.empty()) {
+        // If there are CHECK-NOTs between two CHECK-DAGs or from CHECK to
+        // CHECK-DAG, verify that there are no 'not' strings occurred in that
+        // region.
+        StringRef SkippedRegion =
+            Buffer.slice(StartPos, MatchRanges.begin()->Pos);
+        if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
+          return StringRef::npos;
+        // Clear "not strings".
+        NotStrings.clear();
+      }
+      // All subsequent CHECK-DAGs and CHECK-NOTs should be matched from the
+      // end of this CHECK-DAG group's match range.
+      StartPos = MatchRanges.rbegin()->End;
+      // Don't waste time checking for (impossible) overlaps before that.
+      MatchRanges.clear();
     }
-
-    // Update the last position with CHECK-DAG matches.
-    LastPos = std::max(MatchPos + MatchLen, LastPos);
   }
 
-  return LastPos;
+  return StartPos;
 }
 
 // A check prefix must contain only alphanumeric, hyphens and underscores.
@@ -1357,8 +1542,7 @@ bool CheckInput(SourceMgr &SM, StringRef Buffer,
 }
 
 int main(int argc, char **argv) {
-  sys::PrintStackTraceOnErrorSignal(argv[0]);
-  PrettyStackTraceProgram X(argc, argv);
+  InitLLVM X(argc, argv);
   cl::ParseCommandLineOptions(argc, argv);
 
   if (!ValidateCheckPrefixes()) {
@@ -1379,6 +1563,9 @@ int main(int argc, char **argv) {
     return 2;
   }
 
+  if (VerboseVerbose)
+    Verbose = true;
+
   SourceMgr SM;
 
   // Read the expected strings from the check file.
@@ -1425,5 +1612,9 @@ int main(int argc, char **argv) {
                             InputFileText, InputFile.getBufferIdentifier()),
                         SMLoc());
 
-  return CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1;
+  int ExitCode = CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1;
+  if (ExitCode == 1 && DumpInputOnFailure)
+    errs() << "Full input was:\n<<<<<<\n" << InputFileText << "\n>>>>>>\n";
+
+  return ExitCode;
 }
diff --git a/utils/KillTheDoctor/KillTheDoctor.cpp b/utils/KillTheDoctor/KillTheDoctor.cpp
index 19c880ae94d3..c9def83309f6 100644
--- a/utils/KillTheDoctor/KillTheDoctor.cpp
+++ b/utils/KillTheDoctor/KillTheDoctor.cpp
@@ -218,7 +218,7 @@ static std::error_code GetFileNameFromHandle(HANDLE FileHandle,
   }
 }
 
-/// @brief Find program using shell lookup rules.
+/// Find program using shell lookup rules.
 /// @param Program This is either an absolute path, relative path, or simple a
 ///        program name. Look in PATH for any programs that match. If no
 ///        extension is present, try all extensions in PATHEXT.
diff --git a/utils/LLVMVisualizers/llvm.natvis b/utils/LLVMVisualizers/llvm.natvis
index 439868a5afdb..963c94f7e116 100644
--- a/utils/LLVMVisualizers/llvm.natvis
+++ b/utils/LLVMVisualizers/llvm.natvis
@@ -10,36 +10,36 @@ For later versions of Visual Studio, no setup is required.
 <AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
   <!-- VS2013 -->
   <Type Name="llvm::SmallVectorImpl&lt;*&gt;" Priority="MediumLow">
-    <DisplayString Condition="(($T1*)EndX - ($T1*)BeginX) == 0">empty</DisplayString>
-    <DisplayString Condition="(($T1*)EndX - ($T1*)BeginX) != 0">{{ size={($T1*)EndX - ($T1*)BeginX} }}</DisplayString>
+    <DisplayString Condition="Size == 0">empty</DisplayString>
+    <DisplayString Condition="Size != 0">{{ size={Size} }}</DisplayString>
     <Expand>
-      <Item Name="[size]">($T1*)EndX - ($T1*)BeginX</Item>
-      <Item Name="[capacity]">($T1*)CapacityX - ($T1*)BeginX</Item>
+      <Item Name="[size]">Size</Item>
+      <Item Name="[capacity]">Capacity</Item>
       <ArrayItems>
-        <Size>($T1*)EndX - ($T1*)BeginX</Size>
+        <Size>Size</Size>
         <ValuePointer>($T1*)BeginX</ValuePointer>
       </ArrayItems>
     </Expand>
   </Type>
   <!-- VS2015 and up -->
   <Type Name="llvm::SmallVectorImpl&lt;*&gt;">
-    <DisplayString IncludeView ="elt0" Condition="(($T1*)EndX - ($T1*)BeginX) == 0"></DisplayString>
+    <DisplayString IncludeView ="elt0" Condition="Size == 0"></DisplayString>
     <DisplayString IncludeView ="elt0">{(($T1*)BeginX)[0]}{*this,view(elt1)}</DisplayString>
-    <DisplayString IncludeView ="elt1" Condition="(($T1*)EndX - ($T1*)BeginX) == 1"></DisplayString>
+    <DisplayString IncludeView ="elt1" Condition="Size == 1"></DisplayString>
     <DisplayString IncludeView ="elt1">, {(($T1*)BeginX)[1]}{*this,view(elt2)}</DisplayString>
-    <DisplayString IncludeView ="elt2" Condition="(($T1*)EndX - ($T1*)BeginX) == 2"></DisplayString>
+    <DisplayString IncludeView ="elt2" Condition="Size == 2"></DisplayString>
     <DisplayString IncludeView ="elt2">, {(($T1*)BeginX)[2]}{*this,view(elt3)}</DisplayString>
-    <DisplayString IncludeView ="elt3" Condition="(($T1*)EndX - ($T1*)BeginX) == 3"></DisplayString>
+    <DisplayString IncludeView ="elt3" Condition="Size == 3"></DisplayString>
     <DisplayString IncludeView ="elt3">, {(($T1*)BeginX)[2]}{*this,view(elt4)}</DisplayString>
-    <DisplayString IncludeView ="elt4" Condition="(($T1*)EndX - ($T1*)BeginX) == 4"></DisplayString>
-    <DisplayString IncludeView ="elt4">, /* {(($T1*)EndX - ($T1*)BeginX) - 4} more*/ </DisplayString>
-    <DisplayString Condition="(($T1*)EndX - ($T1*)BeginX) == 0">empty</DisplayString>
-    <DisplayString Condition="(($T1*)EndX - ($T1*)BeginX) != 0">{{{*this,view(elt0)}}}</DisplayString>
+    <DisplayString IncludeView ="elt4" Condition="Size == 4"></DisplayString>
+    <DisplayString IncludeView ="elt4">, /* {Size - 4} more*/ </DisplayString>
+    <DisplayString Condition="Size == 0">empty</DisplayString>
+    <DisplayString Condition="Size != 0">{{{*this,view(elt0)}}}</DisplayString>
     <Expand>
-      <Item Name="[size]">($T1*)EndX - ($T1*)BeginX</Item>
-      <Item Name="[capacity]">($T1*)CapacityX - ($T1*)BeginX</Item>
+      <Item Name="[size]">Size</Item>
+      <Item Name="[capacity]">Capacity</Item>
       <ArrayItems>
-        <Size>($T1*)EndX - ($T1*)BeginX</Size>
+        <Size>Size</Size>
         <ValuePointer>($T1*)BeginX</ValuePointer>
       </ArrayItems>
     </Expand>
@@ -56,13 +56,13 @@ For later versions of Visual Studio, no setup is required.
     </Expand>
   </Type>
   <Type Name="llvm::SmallString&lt;*&gt;">
-    <DisplayString>{(const char*)BeginX,[(char*)EndX - (char*)BeginX] na}</DisplayString>
-    <StringView>(const char*)BeginX,[(char*)EndX - (char*)BeginX]</StringView>
+    <DisplayString>{(const char*)BeginX,[Size] na}</DisplayString>
+    <StringView>(const char*)BeginX,[Size]</StringView>
     <Expand>
-      <Item Name="[size]">(char*)EndX - (char*)BeginX</Item>
-      <Item Name="[capacity]">(char*)CapacityX - (char*)BeginX</Item>
+      <Item Name="[size]">Size</Item>
+      <Item Name="[capacity]">Capacity</Item>
       <ArrayItems>
-        <Size>(char*)EndX - (char*)BeginX</Size>
+        <Size>Size</Size>
         <ValuePointer>(char*)BeginX</ValuePointer>
       </ArrayItems>
     </Expand>
@@ -195,10 +195,19 @@ For later versions of Visual Studio, no setup is required.
   </Type>
   
   <Type Name="llvm::Optional&lt;*&gt;">
-    <DisplayString Condition="!hasVal">empty</DisplayString>
-    <DisplayString Condition="hasVal">{*(($T1 *)(unsigned char *)storage.buffer)}</DisplayString>
+    <DisplayString Condition="!Storage.hasVal">None</DisplayString>
+    <DisplayString Condition="Storage.hasVal">{*(($T1 *)(unsigned char *)Storage.storage.buffer)}</DisplayString>
     <Expand>
-      <Item Name="[underlying]" Condition="hasVal">*(($T1 *)(unsigned char *)storage.buffer)</Item>
+      <Item Name="[underlying]" Condition="Storage.hasVal">*(($T1 *)(unsigned char *)Storage.storage.buffer)</Item>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::Expected&lt;*&gt;">
+    <DisplayString Condition="HasError">Error</DisplayString>
+    <DisplayString Condition="!HasError">{*((storage_type *)TStorage.buffer)}</DisplayString>
+    <Expand>
+      <Item Name="[value]" Condition="!HasError">*((storage_type *)TStorage.buffer)</Item>
+      <Item Name="[error]" Condition="HasError">*((error_type *)ErrorStorage.buffer)</Item>
     </Expand>
   </Type>
 
diff --git a/utils/Reviewing/find_interesting_reviews.py b/utils/Reviewing/find_interesting_reviews.py
new file mode 100644
index 000000000000..5af462b54a9f
--- /dev/null
+++ b/utils/Reviewing/find_interesting_reviews.py
@@ -0,0 +1,633 @@
+#!/usr/bin/env python
+
+import argparse
+import email.mime.multipart
+import email.mime.text
+import logging
+import os.path
+import pickle
+import re
+import smtplib
+import subprocess
+import sys
+from datetime import datetime, timedelta
+from phabricator import Phabricator
+
+# Setting up a virtualenv to run this script can be done by running the
+# following commands:
+# $ virtualenv venv
+# $ . ./venv/bin/activate
+# $ pip install Phabricator
+
+GIT_REPO_METADATA = (("llvm", "https://llvm.org/git/llvm.git"), )
+
+# The below PhabXXX classes represent objects as modelled by Phabricator.
+# The classes can be serialized to disk, to try and make sure that we don't
+# needlessly have to re-fetch lots of data from Phabricator, as that would
+# make this script unusably slow.
+
+
+class PhabObject:
+    OBJECT_KIND = None
+
+    def __init__(self, id):
+        self.id = id
+
+
+class PhabObjectCache:
+    def __init__(self, PhabObjectClass):
+        self.PhabObjectClass = PhabObjectClass
+        self.most_recent_info = None
+        self.oldest_info = None
+        self.id2PhabObjects = {}
+
+    def get_name(self):
+        return self.PhabObjectClass.OBJECT_KIND + "sCache"
+
+    def get(self, id):
+        if id not in self.id2PhabObjects:
+            self.id2PhabObjects[id] = self.PhabObjectClass(id)
+        return self.id2PhabObjects[id]
+
+    def get_ids_in_cache(self):
+        return self.id2PhabObjects.keys()
+
+    def get_objects(self):
+        return self.id2PhabObjects.values()
+
+    DEFAULT_DIRECTORY = "PhabObjectCache"
+
+    def _get_pickle_name(self, directory):
+        file_name = "Phab" + self.PhabObjectClass.OBJECT_KIND + "s.pickle"
+        return os.path.join(directory, file_name)
+
+    def populate_cache_from_disk(self, directory=DEFAULT_DIRECTORY):
+        """
+        FIXME: consider if serializing to JSON would bring interoperability
+        advantages over serializing to pickle.
+        """
+        try:
+            f = open(self._get_pickle_name(directory), "rb")
+        except IOError as err:
+            print("Could not find cache. Error message: {0}. Continuing..."
+                  .format(err))
+        else:
+            with f:
+                try:
+                    d = pickle.load(f)
+                    self.__dict__.update(d)
+                except EOFError as err:
+                    print("Cache seems to be corrupt. " +
+                          "Not using cache. Error message: {0}".format(err))
+
+    def write_cache_to_disk(self, directory=DEFAULT_DIRECTORY):
+        if not os.path.exists(directory):
+            os.makedirs(directory)
+        with open(self._get_pickle_name(directory), "wb") as f:
+            pickle.dump(self.__dict__, f)
+        print("wrote cache to disk, most_recent_info= {0}".format(
+            datetime.fromtimestamp(self.most_recent_info)
+            if self.most_recent_info is not None else None))
+
+
+class PhabReview(PhabObject):
+    OBJECT_KIND = "Review"
+
+    def __init__(self, id):
+        PhabObject.__init__(self, id)
+
+    def update(self, title, dateCreated, dateModified, author):
+        self.title = title
+        self.dateCreated = dateCreated
+        self.dateModified = dateModified
+        self.author = author
+
+    def setPhabDiffs(self, phabDiffs):
+        self.phabDiffs = phabDiffs
+
+
+class PhabUser(PhabObject):
+    OBJECT_KIND = "User"
+
+    def __init__(self, id):
+        PhabObject.__init__(self, id)
+
+    def update(self, phid, realName):
+        self.phid = phid
+        self.realName = realName
+
+
+class PhabHunk:
+    def __init__(self, rest_api_hunk):
+        self.oldOffset = int(rest_api_hunk["oldOffset"])
+        self.oldLength = int(rest_api_hunk["oldLength"])
+        # self.actual_lines_changed_offset will contain the offsets of the
+        # lines that were changed in this hunk.
+        self.actual_lines_changed_offset = []
+        offset = self.oldOffset
+        inHunk = False
+        hunkStart = -1
+        contextLines = 3
+        for line in rest_api_hunk["corpus"].split("\n"):
+            if line.startswith("+"):
+                # line is a new line that got introduced in this patch.
+                # Do not record it as a changed line.
+                if inHunk is False:
+                    inHunk = True
+                    hunkStart = max(self.oldOffset, offset - contextLines)
+                continue
+            if line.startswith("-"):
+                # line was changed or removed from the older version of the
+                # code. Record it as a changed line.
+                if inHunk is False:
+                    inHunk = True
+                    hunkStart = max(self.oldOffset, offset - contextLines)
+                offset += 1
+                continue
+            # line is a context line.
+            if inHunk is True:
+                inHunk = False
+                hunkEnd = offset + contextLines
+                self.actual_lines_changed_offset.append((hunkStart, hunkEnd))
+            offset += 1
+        if inHunk is True:
+            hunkEnd = offset + contextLines
+            self.actual_lines_changed_offset.append((hunkStart, hunkEnd))
+
+        # The above algorithm could result in adjacent or overlapping ranges
+        # being recorded into self.actual_lines_changed_offset.
+        # Merge the adjacent and overlapping ranges in there:
+        t = []
+        lastRange = None
+        for start, end in self.actual_lines_changed_offset + \
+                [(sys.maxsize, sys.maxsize)]:
+            if lastRange is None:
+                lastRange = (start, end)
+            else:
+                if lastRange[1] >= start:
+                    lastRange = (lastRange[0], end)
+                else:
+                    t.append(lastRange)
+                    lastRange = (start, end)
+        self.actual_lines_changed_offset = t
+
+
+class PhabChange:
+    def __init__(self, rest_api_change):
+        self.oldPath = rest_api_change["oldPath"]
+        self.hunks = [PhabHunk(h) for h in rest_api_change["hunks"]]
+
+
+class PhabDiff(PhabObject):
+    OBJECT_KIND = "Diff"
+
+    def __init__(self, id):
+        PhabObject.__init__(self, id)
+
+    def update(self, rest_api_results):
+        self.revisionID = rest_api_results["revisionID"]
+        self.dateModified = int(rest_api_results["dateModified"])
+        self.dateCreated = int(rest_api_results["dateCreated"])
+        self.changes = [PhabChange(c) for c in rest_api_results["changes"]]
+
+
+class ReviewsCache(PhabObjectCache):
+    def __init__(self):
+        PhabObjectCache.__init__(self, PhabReview)
+
+
+class UsersCache(PhabObjectCache):
+    def __init__(self):
+        PhabObjectCache.__init__(self, PhabUser)
+
+
+reviews_cache = ReviewsCache()
+users_cache = UsersCache()
+
+
+def init_phab_connection():
+    phab = Phabricator()
+    phab.update_interfaces()
+    return phab
+
+
+def update_cached_info(phab, cache, phab_query, order, record_results,
+                       max_nr_entries_per_fetch, max_nr_days_to_cache):
+    q = phab
+    LIMIT = max_nr_entries_per_fetch
+    for query_step in phab_query:
+        q = getattr(q, query_step)
+    results = q(order=order, limit=LIMIT)
+    most_recent_info, oldest_info = record_results(cache, results, phab)
+    oldest_info_to_fetch = datetime.fromtimestamp(most_recent_info) - \
+        timedelta(days=max_nr_days_to_cache)
+    most_recent_info_overall = most_recent_info
+    cache.write_cache_to_disk()
+    after = results["cursor"]["after"]
+    print("after: {0!r}".format(after))
+    print("most_recent_info: {0}".format(
+        datetime.fromtimestamp(most_recent_info)))
+    while (after is not None
+           and datetime.fromtimestamp(oldest_info) > oldest_info_to_fetch):
+        need_more_older_data = \
+            (cache.oldest_info is None or
+             datetime.fromtimestamp(cache.oldest_info) > oldest_info_to_fetch)
+        print(("need_more_older_data={0} cache.oldest_info={1} " +
+               "oldest_info_to_fetch={2}").format(
+                   need_more_older_data,
+                   datetime.fromtimestamp(cache.oldest_info)
+                   if cache.oldest_info is not None else None,
+                   oldest_info_to_fetch))
+        need_more_newer_data = \
+            (cache.most_recent_info is None or
+             cache.most_recent_info < most_recent_info)
+        print(("need_more_newer_data={0} cache.most_recent_info={1} " +
+               "most_recent_info={2}")
+              .format(need_more_newer_data, cache.most_recent_info,
+                      most_recent_info))
+        if not need_more_older_data and not need_more_newer_data:
+            break
+        results = q(order=order, after=after, limit=LIMIT)
+        most_recent_info, oldest_info = record_results(cache, results, phab)
+        after = results["cursor"]["after"]
+        print("after: {0!r}".format(after))
+        print("most_recent_info: {0}".format(
+            datetime.fromtimestamp(most_recent_info)))
+        cache.write_cache_to_disk()
+    cache.most_recent_info = most_recent_info_overall
+    if after is None:
+        # We did fetch all records. Mark the cache to contain all info since
+        # the start of time.
+        oldest_info = 0
+    cache.oldest_info = oldest_info
+    cache.write_cache_to_disk()
+
+
+def record_reviews(cache, reviews, phab):
+    most_recent_info = None
+    oldest_info = None
+    for reviewInfo in reviews["data"]:
+        if reviewInfo["type"] != "DREV":
+            continue
+        id = reviewInfo["id"]
+        # phid = reviewInfo["phid"]
+        dateModified = int(reviewInfo["fields"]["dateModified"])
+        dateCreated = int(reviewInfo["fields"]["dateCreated"])
+        title = reviewInfo["fields"]["title"]
+        author = reviewInfo["fields"]["authorPHID"]
+        phabReview = cache.get(id)
+        if "dateModified" not in phabReview.__dict__ or \
+           dateModified > phabReview.dateModified:
+            diff_results = phab.differential.querydiffs(revisionIDs=[id])
+            diff_ids = sorted(diff_results.keys())
+            phabDiffs = []
+            for diff_id in diff_ids:
+                diffInfo = diff_results[diff_id]
+                d = PhabDiff(diff_id)
+                d.update(diffInfo)
+                phabDiffs.append(d)
+            phabReview.update(title, dateCreated, dateModified, author)
+            phabReview.setPhabDiffs(phabDiffs)
+            print("Updated D{0} modified on {1} ({2} diffs)".format(
+                id, datetime.fromtimestamp(dateModified), len(phabDiffs)))
+
+        if most_recent_info is None:
+            most_recent_info = dateModified
+        elif most_recent_info < dateModified:
+            most_recent_info = dateModified
+
+        if oldest_info is None:
+            oldest_info = dateModified
+        elif oldest_info > dateModified:
+            oldest_info = dateModified
+    return most_recent_info, oldest_info
+
+
+def record_users(cache, users, phab):
+    most_recent_info = None
+    oldest_info = None
+    for info in users["data"]:
+        if info["type"] != "USER":
+            continue
+        id = info["id"]
+        phid = info["phid"]
+        dateModified = int(info["fields"]["dateModified"])
+        # dateCreated = int(info["fields"]["dateCreated"])
+        realName = info["fields"]["realName"]
+        phabUser = cache.get(id)
+        phabUser.update(phid, realName)
+        if most_recent_info is None:
+            most_recent_info = dateModified
+        elif most_recent_info < dateModified:
+            most_recent_info = dateModified
+        if oldest_info is None:
+            oldest_info = dateModified
+        elif oldest_info > dateModified:
+            oldest_info = dateModified
+    return most_recent_info, oldest_info
+
+
+PHABCACHESINFO = ((reviews_cache, ("differential", "revision", "search"),
+                   "updated", record_reviews, 5, 7),
+                  (users_cache, ("user", "search"), "newest", record_users,
+                   100, 1000))
+
+
+def load_cache():
+    for cache, phab_query, order, record_results, _, _ in PHABCACHESINFO:
+        cache.populate_cache_from_disk()
+        print("Loaded {0} nr entries: {1}".format(
+            cache.get_name(), len(cache.get_ids_in_cache())))
+        print("Loaded {0} has most recent info: {1}".format(
+            cache.get_name(),
+            datetime.fromtimestamp(cache.most_recent_info)
+            if cache.most_recent_info is not None else None))
+
+
+def update_cache(phab):
+    load_cache()
+    for cache, phab_query, order, record_results, max_nr_entries_per_fetch, \
+            max_nr_days_to_cache in PHABCACHESINFO:
+        update_cached_info(phab, cache, phab_query, order, record_results,
+                           max_nr_entries_per_fetch, max_nr_days_to_cache)
+        ids_in_cache = cache.get_ids_in_cache()
+        print("{0} objects in {1}".format(len(ids_in_cache), cache.get_name()))
+        cache.write_cache_to_disk()
+
+
+def get_most_recent_reviews(days):
+    newest_reviews = sorted(
+        reviews_cache.get_objects(), key=lambda r: -r.dateModified)
+    if len(newest_reviews) == 0:
+        return newest_reviews
+    most_recent_review_time = \
+        datetime.fromtimestamp(newest_reviews[0].dateModified)
+    cut_off_date = most_recent_review_time - timedelta(days=days)
+    result = []
+    for review in newest_reviews:
+        if datetime.fromtimestamp(review.dateModified) < cut_off_date:
+            return result
+        result.append(review)
+    return result
+
+
+# All of the above code is about fetching data from Phabricator and caching it
+# on local disk. The below code contains the actual "business logic" for this
+# script.
+
+_userphid2realname = None
+
+
+def get_real_name_from_author(user_phid):
+    global _userphid2realname
+    if _userphid2realname is None:
+        _userphid2realname = {}
+        for user in users_cache.get_objects():
+            _userphid2realname[user.phid] = user.realName
+    return _userphid2realname.get(user_phid, "unknown")
+
+
+def print_most_recent_reviews(phab, days, filter_reviewers):
+    msgs = []
+
+    def add_msg(msg):
+        msgs.append(msg)
+        print(msg)
+
+    newest_reviews = get_most_recent_reviews(days)
+    add_msg(u"These are the reviews that look interesting to be reviewed. " +
+            u"The report below has 2 sections. The first " +
+            u"section is organized per review; the second section is organized "
+            + u"per potential reviewer.\n")
+    oldest_review = newest_reviews[-1] if len(newest_reviews) > 0 else None
+    oldest_datetime = \
+        datetime.fromtimestamp(oldest_review.dateModified) \
+        if oldest_review else None
+    add_msg((u"The report below is based on analyzing the reviews that got " +
+             u"touched in the past {0} days (since {1}). " +
+             u"The script found {2} such reviews.\n").format(
+                 days, oldest_datetime, len(newest_reviews)))
+    reviewer2reviews_and_scores = {}
+    for i, review in enumerate(newest_reviews):
+        matched_reviewers = find_reviewers_for_review(review)
+        matched_reviewers = filter_reviewers(matched_reviewers)
+        if len(matched_reviewers) == 0:
+            continue
+        add_msg((u"{0:>3}. https://reviews.llvm.org/D{1} by {2}\n     {3}\n" +
+                 u"     Last updated on {4}").format(
+                     i, review.id,
+                     get_real_name_from_author(review.author), review.title,
+                     datetime.fromtimestamp(review.dateModified)))
+        for reviewer, scores in matched_reviewers:
+            add_msg(u"    potential reviewer {0}, score {1}".format(
+                reviewer,
+                "(" + "/".join(["{0:.1f}%".format(s) for s in scores]) + ")"))
+            if reviewer not in reviewer2reviews_and_scores:
+                reviewer2reviews_and_scores[reviewer] = []
+            reviewer2reviews_and_scores[reviewer].append((review, scores))
+
+    # Print out a summary per reviewer.
+    for reviewer in sorted(reviewer2reviews_and_scores.keys()):
+        reviews_and_scores = reviewer2reviews_and_scores[reviewer]
+        reviews_and_scores.sort(key=lambda rs: rs[1], reverse=True)
+        add_msg(u"\n\nSUMMARY FOR {0} (found {1} reviews):".format(
+            reviewer, len(reviews_and_scores)))
+        for review, scores in reviews_and_scores:
+            add_msg(u"[{0}] https://reviews.llvm.org/D{1} '{2}' by {3}".format(
+                "/".join(["{0:.1f}%".format(s) for s in scores]), review.id,
+                review.title, get_real_name_from_author(review.author)))
+    return "\n".join(msgs)
+
+
+def get_git_cmd_output(cmd):
+    output = None
+    try:
+        logging.debug(cmd)
+        output = subprocess.check_output(
+            cmd, shell=True, stderr=subprocess.STDOUT)
+    except subprocess.CalledProcessError as e:
+        logging.debug(str(e))
+    if output is None:
+        return None
+    return output.decode("utf-8", errors='ignore')
+
+
+reAuthorMail = re.compile("^author-mail <([^>]*)>.*$")
+
+
+def parse_blame_output_line_porcelain(blame_output):
+    email2nr_occurences = {}
+    if blame_output is None:
+        return email2nr_occurences
+    for line in blame_output.split('\n'):
+        m = reAuthorMail.match(line)
+        if m:
+            author_email_address = m.group(1)
+            if author_email_address not in email2nr_occurences:
+                email2nr_occurences[author_email_address] = 1
+            else:
+                email2nr_occurences[author_email_address] += 1
+    return email2nr_occurences
+
+
+def find_reviewers_for_diff_heuristic(diff):
+    # Heuristic 1: assume good reviewers are the ones that touched the same
+    # lines before as this patch is touching.
+    # Heuristic 2: assume good reviewers are the ones that touched the same
+    # files before as this patch is touching.
+    reviewers2nr_lines_touched = {}
+    reviewers2nr_files_touched = {}
+    # Assume last revision before diff was modified is the revision the diff
+    # applies to.
+    git_repo = "git_repos/llvm"
+    cmd = 'git -C {0} rev-list -n 1 --before="{1}" master'.format(
+        git_repo,
+        datetime.fromtimestamp(
+            diff.dateModified).strftime("%Y-%m-%d %H:%M:%s"))
+    base_revision = get_git_cmd_output(cmd).strip()
+    logging.debug("Base revision={0}".format(base_revision))
+    for change in diff.changes:
+        path = change.oldPath
+        # Compute heuristic 1: look at context of patch lines.
+        for hunk in change.hunks:
+            for start_line, end_line in hunk.actual_lines_changed_offset:
+                # Collect git blame results for authors in those ranges.
+                cmd = ("git -C {0} blame --encoding=utf-8 --date iso -f -e " +
+                       "-w --line-porcelain -L {1},{2} {3} -- {4}").format(
+                           git_repo, start_line, end_line, base_revision, path)
+                blame_output = get_git_cmd_output(cmd)
+                for reviewer, nr_occurences in \
+                        parse_blame_output_line_porcelain(blame_output).items():
+                    if reviewer not in reviewers2nr_lines_touched:
+                        reviewers2nr_lines_touched[reviewer] = 0
+                    reviewers2nr_lines_touched[reviewer] += nr_occurences
+        # Compute heuristic 2: don't look at context, just at files touched.
+        # Collect git blame results for authors in those ranges.
+        cmd = ("git -C {0} blame --encoding=utf-8 --date iso -f -e -w " +
+               "--line-porcelain {1} -- {2}").format(git_repo, base_revision,
+                                                     path)
+        blame_output = get_git_cmd_output(cmd)
+        for reviewer, nr_occurences in parse_blame_output_line_porcelain(
+                blame_output).items():
+            if reviewer not in reviewers2nr_files_touched:
+                reviewers2nr_files_touched[reviewer] = 0
+            reviewers2nr_files_touched[reviewer] += 1
+
+    # Compute "match scores"
+    total_nr_lines = sum(reviewers2nr_lines_touched.values())
+    total_nr_files = len(diff.changes)
+    reviewers_matchscores = \
+        [(reviewer,
+          (reviewers2nr_lines_touched.get(reviewer, 0)*100.0/total_nr_lines
+           if total_nr_lines != 0 else 0,
+           reviewers2nr_files_touched[reviewer]*100.0/total_nr_files
+           if total_nr_files != 0 else 0))
+         for reviewer, nr_lines
+         in reviewers2nr_files_touched.items()]
+    reviewers_matchscores.sort(key=lambda i: i[1], reverse=True)
+    return reviewers_matchscores
+
+
+def find_reviewers_for_review(review):
+    # Process the newest diff first.
+    diffs = sorted(
+        review.phabDiffs, key=lambda d: d.dateModified, reverse=True)
+    if len(diffs) == 0:
+        return
+    diff = diffs[0]
+    matched_reviewers = find_reviewers_for_diff_heuristic(diff)
+    # Show progress, as this is a slow operation:
+    sys.stdout.write('.')
+    sys.stdout.flush()
+    logging.debug(u"matched_reviewers: {0}".format(matched_reviewers))
+    return matched_reviewers
+
+
+def update_git_repos():
+    git_repos_directory = "git_repos"
+    for name, url in GIT_REPO_METADATA:
+        dirname = os.path.join(git_repos_directory, name)
+        if not os.path.exists(dirname):
+            cmd = "git clone {0} {1}".format(url, dirname)
+            output = get_git_cmd_output(cmd)
+        cmd = "git -C {0} pull --rebase".format(dirname)
+        output = get_git_cmd_output(cmd)
+
+
+def send_emails(email_addresses, sender, msg):
+    s = smtplib.SMTP()
+    s.connect()
+    for email_address in email_addresses:
+        email_msg = email.mime.multipart.MIMEMultipart()
+        email_msg['From'] = sender
+        email_msg['To'] = email_address
+        email_msg['Subject'] = 'LLVM patches you may be able to review.'
+        email_msg.attach(email.mime.text.MIMEText(msg.encode('utf-8'), 'plain'))
+        # python 3.x: s.send_message(email_msg)
+        s.sendmail(email_msg['From'], email_msg['To'], email_msg.as_string())
+    s.quit()
+
+
+def filter_reviewers_to_report_for(people_to_look_for):
+    # The below is just an example filter, to only report potential reviews
+    # to do for the people that will receive the report email.
+    return lambda potential_reviewers: [r for r in potential_reviewers
+                                        if r[0] in people_to_look_for]
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Match open reviews to potential reviewers.')
+    parser.add_argument(
+        '--no-update-cache',
+        dest='update_cache',
+        action='store_false',
+        default=True,
+        help='Do not update cached Phabricator objects')
+    parser.add_argument(
+        '--email-report',
+        dest='email_report',
+        nargs='*',
+        default="",
+        help="A email addresses to send the report to.")
+    parser.add_argument(
+        '--sender',
+        dest='sender',
+        default="",
+        help="The email address to use in 'From' on messages emailed out.")
+    parser.add_argument(
+        '--email-addresses',
+        dest='email_addresses',
+        nargs='*',
+        help="The email addresses (as known by LLVM git) of " +
+        "the people to look for reviews for.")
+    parser.add_argument('--verbose', '-v', action='count')
+
+    args = parser.parse_args()
+
+    if args.verbose >= 1:
+        logging.basicConfig(level=logging.DEBUG)
+
+    people_to_look_for = [e.decode('utf-8') for e in args.email_addresses]
+    logging.debug("Will look for reviews that following contributors could " +
+                  "review: {}".format(people_to_look_for))
+    logging.debug("Will email a report to: {}".format(args.email_report))
+
+    phab = init_phab_connection()
+
+    if args.update_cache:
+        update_cache(phab)
+
+    load_cache()
+    update_git_repos()
+    msg = print_most_recent_reviews(
+        phab,
+        days=1,
+        filter_reviewers=filter_reviewers_to_report_for(people_to_look_for))
+
+    if args.email_report != []:
+        send_emails(args.email_report, args.sender, msg)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
index f2d304bfcf5b..e808661b7a51 100644
--- a/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -105,6 +105,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/llvm-config.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -272,9 +273,17 @@ public:
       return true;
 
     // ... or if any of its super classes are a subset of RHS.
-    for (const ClassInfo *CI : SuperClasses)
-      if (CI->isSubsetOf(RHS))
+    SmallVector<const ClassInfo *, 16> Worklist(SuperClasses.begin(),
+                                                SuperClasses.end());
+    SmallPtrSet<const ClassInfo *, 16> Visited;
+    while (!Worklist.empty()) {
+      auto *CI = Worklist.pop_back_val();
+      if (CI == &RHS)
         return true;
+      for (auto *Super : CI->SuperClasses)
+        if (Visited.insert(Super).second)
+          Worklist.push_back(Super);
+    }
 
     return false;
   }
@@ -378,6 +387,9 @@ struct MatchableInfo {
     /// The operand name this is, if anything.
     StringRef SrcOpName;
 
+    /// The operand name this is, before renaming for tied operands.
+    StringRef OrigSrcOpName;
+
     /// The suboperand index within SrcOpName, or -1 for the entire operand.
     int SubOpIdx;
 
@@ -416,14 +428,22 @@ struct MatchableInfo {
       RegOperand
     } Kind;
 
+    /// Tuple containing the index of the (earlier) result operand that should
+    /// be copied from, as well as the indices of the corresponding (parsed)
+    /// operands in the asm string.
+    struct TiedOperandsTuple {
+      unsigned ResOpnd;
+      unsigned SrcOpnd1Idx;
+      unsigned SrcOpnd2Idx;
+    };
+
     union {
       /// This is the operand # in the AsmOperands list that this should be
       /// copied from.
       unsigned AsmOperandNum;
 
-      /// TiedOperandNum - This is the (earlier) result operand that should be
-      /// copied from.
-      unsigned TiedOperandNum;
+      /// Description of tied operands.
+      TiedOperandsTuple TiedOperands;
 
       /// ImmVal - This is the immediate value added to the instruction.
       int64_t ImmVal;
@@ -444,10 +464,11 @@ struct MatchableInfo {
       return X;
     }
 
-    static ResOperand getTiedOp(unsigned TiedOperandNum) {
+    static ResOperand getTiedOp(unsigned TiedOperandNum, unsigned SrcOperand1,
+                                unsigned SrcOperand2) {
       ResOperand X;
       X.Kind = TiedOperand;
-      X.TiedOperandNum = TiedOperandNum;
+      X.TiedOperands = { TiedOperandNum, SrcOperand1, SrcOperand2 };
       X.MINumOperands = 1;
       return X;
     }
@@ -560,7 +581,7 @@ struct MatchableInfo {
 
   /// validate - Return true if this matchable is a valid thing to match against
   /// and perform a bunch of validity checking.
-  bool validate(StringRef CommentDelimiter, bool Hack) const;
+  bool validate(StringRef CommentDelimiter, bool IsAlias) const;
 
   /// findAsmOperand - Find the AsmOperand with the specified name and
   /// suboperand index.
@@ -573,14 +594,21 @@ struct MatchableInfo {
 
   /// findAsmOperandNamed - Find the first AsmOperand with the specified name.
   /// This does not check the suboperand index.
-  int findAsmOperandNamed(StringRef N) const {
-    auto I = find_if(AsmOperands,
+  int findAsmOperandNamed(StringRef N, int LastIdx = -1) const {
+    auto I = std::find_if(AsmOperands.begin() + LastIdx + 1, AsmOperands.end(),
                      [&](const AsmOperand &Op) { return Op.SrcOpName == N; });
     return (I != AsmOperands.end()) ? I - AsmOperands.begin() : -1;
   }
 
+  int findAsmOperandOriginallyNamed(StringRef N) const {
+    auto I =
+        find_if(AsmOperands,
+                [&](const AsmOperand &Op) { return Op.OrigSrcOpName == N; });
+    return (I != AsmOperands.end()) ? I - AsmOperands.begin() : -1;
+  }
+
   void buildInstructionResultOperands();
-  void buildAliasResultOperands();
+  void buildAliasResultOperands(bool AliasConstraintsAreChecked);
 
   /// operator< - Compare two matchables.
   bool operator<(const MatchableInfo &RHS) const {
@@ -620,6 +648,10 @@ struct MatchableInfo {
     if (Mnemonic != RHS.Mnemonic)
       return false;
 
+    // Different variants can't conflict.
+    if (AsmVariantID != RHS.AsmVariantID)
+      return false;
+
     // The number of operands is unambiguous.
     if (AsmOperands.size() != RHS.AsmOperands.size())
       return false;
@@ -770,6 +802,8 @@ public:
 LLVM_DUMP_METHOD void MatchableInfo::dump() const {
   errs() << TheDef->getName() << " -- " << "flattened:\"" << AsmString <<"\"\n";
 
+  errs() << "  variant: " << AsmVariantID << "\n";
+
   for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i) {
     const AsmOperand &Op = AsmOperands[i];
     errs() << "  op[" << i << "] = " << Op.Class->ClassName << " - ";
@@ -840,10 +874,6 @@ void MatchableInfo::formTwoOperandAlias(StringRef Constraint) {
       if (Op.AsmOperandNum > (unsigned)SrcAsmOperand)
         --Op.AsmOperandNum;
       break;
-    case ResOperand::TiedOperand:
-      if (Op.TiedOperandNum > (unsigned)SrcAsmOperand)
-        --Op.TiedOperandNum;
-      break;
     }
   }
 }
@@ -1019,7 +1049,7 @@ void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info,
     addAsmOperand(String.substr(Prev), IsIsolatedToken);
 }
 
-bool MatchableInfo::validate(StringRef CommentDelimiter, bool Hack) const {
+bool MatchableInfo::validate(StringRef CommentDelimiter, bool IsAlias) const {
   // Reject matchables with no .s string.
   if (AsmString.empty())
     PrintFatalError(TheDef->getLoc(), "instruction with empty asm string");
@@ -1052,17 +1082,10 @@ bool MatchableInfo::validate(StringRef CommentDelimiter, bool Hack) const {
       PrintFatalError(TheDef->getLoc(),
                       "matchable with operand modifier '" + Tok +
                       "' not supported by asm matcher.  Mark isCodeGenOnly!");
-
     // Verify that any operand is only mentioned once.
     // We reject aliases and ignore instructions for now.
-    if (Tok[0] == '$' && !OperandNames.insert(Tok).second) {
-      if (!Hack)
-        PrintFatalError(TheDef->getLoc(),
-                        "ERROR: matchable with tied operand '" + Tok +
-                        "' can never be matched!");
-      // FIXME: Should reject these.  The ARM backend hits this with $lane in a
-      // bunch of instructions.  It is unclear what the right answer is.
-      DEBUG({
+    if (!IsAlias && Tok[0] == '$' && !OperandNames.insert(Tok).second) {
+      LLVM_DEBUG({
         errs() << "warning: '" << TheDef->getName() << "': "
                << "ignoring instruction with tied operand '"
                << Tok << "'\n";
@@ -1448,11 +1471,13 @@ void AsmMatcherInfo::buildInfo() {
                            SubtargetFeaturePairs.end());
 #ifndef NDEBUG
   for (const auto &Pair : SubtargetFeatures)
-    DEBUG(Pair.second.dump());
+    LLVM_DEBUG(Pair.second.dump());
 #endif // NDEBUG
   assert(SubtargetFeatures.size() <= 64 && "Too many subtarget features!");
 
   bool HasMnemonicFirst = AsmParser->getValueAsBit("HasMnemonicFirst");
+  bool ReportMultipleNearMisses =
+      AsmParser->getValueAsBit("ReportMultipleNearMisses");
 
   // Parse the instructions; we need to do this first so that we can gather the
   // singleton register classes.
@@ -1495,7 +1520,7 @@ void AsmMatcherInfo::buildInfo() {
 
       // Ignore instructions which shouldn't be matched and diagnose invalid
       // instruction definitions with an error.
-      if (!II->validate(CommentDelimiter, true))
+      if (!II->validate(CommentDelimiter, false))
         continue;
 
       Matchables.push_back(std::move(II));
@@ -1507,7 +1532,6 @@ void AsmMatcherInfo::buildInfo() {
       Records.getAllDerivedDefinitions("InstAlias");
     for (unsigned i = 0, e = AllInstAliases.size(); i != e; ++i) {
       auto Alias = llvm::make_unique<CodeGenInstAlias>(AllInstAliases[i],
-                                                       Variant.AsmVariantNo,
                                                        Target);
 
       // If the tblgen -match-prefix option is specified (for tblgen hackers),
@@ -1526,7 +1550,7 @@ void AsmMatcherInfo::buildInfo() {
       II->initialize(*this, SingletonRegisters, Variant, HasMnemonicFirst);
 
       // Validate the alias definitions.
-      II->validate(CommentDelimiter, false);
+      II->validate(CommentDelimiter, true);
 
       Matchables.push_back(std::move(II));
     }
@@ -1599,7 +1623,12 @@ void AsmMatcherInfo::buildInfo() {
         NewMatchables.push_back(std::move(AliasII));
       }
     } else
-      II->buildAliasResultOperands();
+      // FIXME: The tied operands checking is not yet integrated with the
+      // framework for reporting multiple near misses. To prevent invalid
+      // formats from being matched with an alias if a tied-operands check
+      // would otherwise have disallowed it, we just disallow such constructs
+      // in TableGen completely.
+      II->buildAliasResultOperands(!ReportMultipleNearMisses);
   }
   if (!NewMatchables.empty())
     Matchables.insert(Matchables.end(),
@@ -1672,6 +1701,7 @@ buildInstructionOperandReference(MatchableInfo *II,
 
   // Set up the operand class.
   Op->Class = getOperandClass(Operands[Idx], Op->SubOpIdx);
+  Op->OrigSrcOpName = OperandName;
 
   // If the named operand is tied, canonicalize it to the untied operand.
   // For example, something like:
@@ -1716,6 +1746,7 @@ void AsmMatcherInfo::buildAliasOperandReference(MatchableInfo *II,
       Op.Class = getOperandClass(CGA.ResultOperands[i].getRecord(),
                                  Op.SubOpIdx);
       Op.SrcOpName = OperandName;
+      Op.OrigSrcOpName = OperandName;
       return;
     }
 
@@ -1734,11 +1765,16 @@ void MatchableInfo::buildInstructionResultOperands() {
     if (OpInfo.MINumOperands == 1)
       TiedOp = OpInfo.getTiedRegister();
     if (TiedOp != -1) {
-      ResOperands.push_back(ResOperand::getTiedOp(TiedOp));
+      int TiedSrcOperand = findAsmOperandOriginallyNamed(OpInfo.Name);
+      if (TiedSrcOperand != -1 &&
+          ResOperands[TiedOp].Kind == ResOperand::RenderAsmOperand)
+        ResOperands.push_back(ResOperand::getTiedOp(
+            TiedOp, ResOperands[TiedOp].AsmOperandNum, TiedSrcOperand));
+      else
+        ResOperands.push_back(ResOperand::getTiedOp(TiedOp, 0, 0));
       continue;
     }
 
-    // Find out what operand from the asmparser this MCInst operand comes from.
     int SrcOperand = findAsmOperandNamed(OpInfo.Name);
     if (OpInfo.Name.empty() || SrcOperand == -1) {
       // This may happen for operands that are tied to a suboperand of a
@@ -1767,10 +1803,16 @@ void MatchableInfo::buildInstructionResultOperands() {
   }
 }
 
-void MatchableInfo::buildAliasResultOperands() {
+void MatchableInfo::buildAliasResultOperands(bool AliasConstraintsAreChecked) {
   const CodeGenInstAlias &CGA = *DefRec.get<const CodeGenInstAlias*>();
   const CodeGenInstruction *ResultInst = getResultInst();
 
+  // Map of:  $reg -> #lastref
+  //   where $reg is the name of the operand in the asm string
+  //   where #lastref is the last processed index where $reg was referenced in
+  //   the asm string.
+  SmallDenseMap<StringRef, int> OperandRefs;
+
   // Loop over all operands of the result instruction, determining how to
   // populate them.
   unsigned AliasOpNo = 0;
@@ -1783,8 +1825,46 @@ void MatchableInfo::buildAliasResultOperands() {
     if (OpInfo->MINumOperands == 1)
       TiedOp = OpInfo->getTiedRegister();
     if (TiedOp != -1) {
-      ResOperands.push_back(ResOperand::getTiedOp(TiedOp));
-      continue;
+      unsigned SrcOp1 = 0;
+      unsigned SrcOp2 = 0;
+
+      // If an operand has been specified twice in the asm string,
+      // add the two source operand's indices to the TiedOp so that
+      // at runtime the 'tied' constraint is checked.
+      if (ResOperands[TiedOp].Kind == ResOperand::RenderAsmOperand) {
+        SrcOp1 = ResOperands[TiedOp].AsmOperandNum;
+
+        // Find the next operand (similarly named operand) in the string.
+        StringRef Name = AsmOperands[SrcOp1].SrcOpName;
+        auto Insert = OperandRefs.try_emplace(Name, SrcOp1);
+        SrcOp2 = findAsmOperandNamed(Name, Insert.first->second);
+
+        // Not updating the record in OperandRefs will cause TableGen
+        // to fail with an error at the end of this function.
+        if (AliasConstraintsAreChecked)
+          Insert.first->second = SrcOp2;
+
+        // In case it only has one reference in the asm string,
+        // it doesn't need to be checked for tied constraints.
+        SrcOp2 = (SrcOp2 == (unsigned)-1) ? SrcOp1 : SrcOp2;
+      }
+
+      // If the alias operand is of a different operand class, we only want
+      // to benefit from the tied-operands check and just match the operand
+      // as a normal, but not copy the original (TiedOp) to the result
+      // instruction. We do this by passing -1 as the tied operand to copy.
+      if (ResultInst->Operands[i].Rec->getName() !=
+          ResultInst->Operands[TiedOp].Rec->getName()) {
+        SrcOp1 = ResOperands[TiedOp].AsmOperandNum;
+        int SubIdx = CGA.ResultInstOperandIndex[AliasOpNo].second;
+        StringRef Name = CGA.ResultOperands[AliasOpNo].getName();
+        SrcOp2 = findAsmOperand(Name, SubIdx);
+        ResOperands.push_back(
+            ResOperand::getTiedOp((unsigned)-1, SrcOp1, SrcOp2));
+      } else {
+        ResOperands.push_back(ResOperand::getTiedOp(TiedOp, SrcOp1, SrcOp2));
+        continue;
+      }
     }
 
     // Handle all the suboperands for this operand.
@@ -1803,6 +1883,11 @@ void MatchableInfo::buildAliasResultOperands() {
           PrintFatalError(TheDef->getLoc(), "Instruction '" +
                         TheDef->getName() + "' has operand '" + OpName +
                         "' that doesn't appear in asm string!");
+
+        // Add it to the operand references. If it is added a second time, the
+        // record won't be updated and it will fail later on.
+        OperandRefs.try_emplace(Name, SrcOperand);
+
         unsigned NumOperands = (SubIdx == -1 ? OpInfo->MINumOperands : 1);
         ResOperands.push_back(ResOperand::getRenderedOp(SrcOperand,
                                                         NumOperands));
@@ -1821,6 +1906,13 @@ void MatchableInfo::buildAliasResultOperands() {
       }
     }
   }
+
+  // Check that operands are not repeated more times than is supported.
+  for (auto &T : OperandRefs) {
+    if (T.second != -1 && findAsmOperandNamed(T.first, T.second) != -1)
+      PrintFatalError(TheDef->getLoc(),
+                      "Operand '" + T.first + "' can never be matched");
+  }
 }
 
 static unsigned
@@ -1897,9 +1989,15 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
   CvtOS << "      static_cast<" << TargetOperandClass
         << "&>(*Operands[OpIdx]).addRegOperands(Inst, 1);\n";
   CvtOS << "      break;\n";
-  CvtOS << "    case CVT_Tied:\n";
-  CvtOS << "      Inst.addOperand(Inst.getOperand(OpIdx));\n";
+  CvtOS << "    case CVT_Tied: {\n";
+  CvtOS << "      assert(OpIdx < (size_t)(std::end(TiedAsmOperandTable) -\n";
+  CvtOS << "                          std::begin(TiedAsmOperandTable)) &&\n";
+  CvtOS << "             \"Tied operand not found\");\n";
+  CvtOS << "      unsigned TiedResOpnd = TiedAsmOperandTable[OpIdx][0];\n";
+  CvtOS << "      if (TiedResOpnd != (uint8_t) -1)\n";
+  CvtOS << "        Inst.addOperand(Inst.getOperand(TiedResOpnd));\n";
   CvtOS << "      break;\n";
+  CvtOS << "    }\n";
 
   std::string OperandFnBody;
   raw_string_ostream OpOS(OperandFnBody);
@@ -1930,6 +2028,10 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
   OperandConversionKinds.insert(CachedHashString("CVT_Tied"));
   enum { CVT_Done, CVT_Reg, CVT_Tied };
 
+  // Map of e.g. <0, 2, 3> -> "Tie_0_2_3" enum label.
+  std::map<std::tuple<uint8_t, uint8_t, uint8_t>, std::string>
+  TiedOperandsEnumMap;
+
   for (auto &II : Infos) {
     // Check if we have a custom match function.
     StringRef AsmMatchConverter =
@@ -2050,11 +2152,24 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
         // If this operand is tied to a previous one, just copy the MCInst
         // operand from the earlier one.We can only tie single MCOperand values.
         assert(OpInfo.MINumOperands == 1 && "Not a singular MCOperand");
-        unsigned TiedOp = OpInfo.TiedOperandNum;
-        assert(i > TiedOp && "Tied operand precedes its target!");
-        Signature += "__Tie" + utostr(TiedOp);
+        uint8_t TiedOp = OpInfo.TiedOperands.ResOpnd;
+        uint8_t SrcOp1 =
+            OpInfo.TiedOperands.SrcOpnd1Idx + HasMnemonicFirst;
+        uint8_t SrcOp2 =
+            OpInfo.TiedOperands.SrcOpnd2Idx + HasMnemonicFirst;
+        assert((i > TiedOp || TiedOp == (uint8_t)-1) &&
+               "Tied operand precedes its target!");
+        auto TiedTupleName = std::string("Tie") + utostr(TiedOp) + '_' +
+                             utostr(SrcOp1) + '_' + utostr(SrcOp2);
+        Signature += "__" + TiedTupleName;
         ConversionRow.push_back(CVT_Tied);
         ConversionRow.push_back(TiedOp);
+        ConversionRow.push_back(SrcOp1);
+        ConversionRow.push_back(SrcOp2);
+
+        // Also create an 'enum' for this combination of tied operands.
+        auto Key = std::make_tuple(TiedOp, SrcOp1, SrcOp2);
+        TiedOperandsEnumMap.emplace(Key, TiedTupleName);
         break;
       }
       case MatchableInfo::ResOperand::ImmOperand: {
@@ -2139,6 +2254,33 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
   // Finish up the operand number lookup function.
   OpOS << "    }\n  }\n}\n\n";
 
+  // Output a static table for tied operands.
+  if (TiedOperandsEnumMap.size()) {
+    // The number of tied operand combinations will be small in practice,
+    // but just add the assert to be sure.
+    assert(TiedOperandsEnumMap.size() <= 254 &&
+           "Too many tied-operand combinations to reference with "
+           "an 8bit offset from the conversion table, where index "
+           "'255' is reserved as operand not to be copied.");
+
+    OS << "enum {\n";
+    for (auto &KV : TiedOperandsEnumMap) {
+      OS << "  " << KV.second << ",\n";
+    }
+    OS << "};\n\n";
+
+    OS << "static const uint8_t TiedAsmOperandTable[][3] = {\n";
+    for (auto &KV : TiedOperandsEnumMap) {
+      OS << "  /* " << KV.second << " */ { "
+         << utostr(std::get<0>(KV.first)) << ", "
+         << utostr(std::get<1>(KV.first)) << ", "
+         << utostr(std::get<2>(KV.first)) << " },\n";
+    }
+    OS << "};\n\n";
+  } else
+    OS << "static const uint8_t TiedAsmOperandTable[][3] = "
+          "{ /* empty  */ {0, 0, 0} };\n\n";
+
   OS << "namespace {\n";
 
   // Output the operand conversion kind enum.
@@ -2165,9 +2307,26 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
     assert(ConversionTable[Row].size() % 2 == 0 && "bad conversion row!");
     OS << "  // " << InstructionConversionKinds[Row] << "\n";
     OS << "  { ";
-    for (unsigned i = 0, e = ConversionTable[Row].size(); i != e; i += 2)
-      OS << OperandConversionKinds[ConversionTable[Row][i]] << ", "
-         << (unsigned)(ConversionTable[Row][i + 1]) << ", ";
+    for (unsigned i = 0, e = ConversionTable[Row].size(); i != e; i += 2) {
+      OS << OperandConversionKinds[ConversionTable[Row][i]] << ", ";
+      if (OperandConversionKinds[ConversionTable[Row][i]] !=
+          CachedHashString("CVT_Tied")) {
+        OS << (unsigned)(ConversionTable[Row][i + 1]) << ", ";
+        continue;
+      }
+
+      // For a tied operand, emit a reference to the TiedAsmOperandTable
+      // that contains the operand to copy, and the parsed operands to
+      // check for their tied constraints.
+      auto Key = std::make_tuple((uint8_t)ConversionTable[Row][i + 1],
+                                 (uint8_t)ConversionTable[Row][i + 2],
+                                 (uint8_t)ConversionTable[Row][i + 3]);
+      auto TiedOpndEnum = TiedOperandsEnumMap.find(Key);
+      assert(TiedOpndEnum != TiedOperandsEnumMap.end() &&
+             "No record for tied operand pair");
+      OS << TiedOpndEnum->second << ", ";
+      i += 2;
+    }
     OS << "CVT_Done },\n";
   }
 
@@ -2307,14 +2466,20 @@ static void emitValidateOperandClass(AsmMatcherInfo &Info,
       continue;
 
     OS << "  // '" << CI.ClassName << "' class\n";
-    OS << "  case " << CI.Name << ":\n";
-    OS << "    if (Operand." << CI.PredicateMethod << "())\n";
+    OS << "  case " << CI.Name << ": {\n";
+    OS << "    DiagnosticPredicate DP(Operand." << CI.PredicateMethod
+       << "());\n";
+    OS << "    if (DP.isMatch())\n";
     OS << "      return MCTargetAsmParser::Match_Success;\n";
-    if (!CI.DiagnosticType.empty())
-      OS << "    return " << Info.Target.getName() << "AsmParser::Match_"
+    if (!CI.DiagnosticType.empty()) {
+      OS << "    if (DP.isNearMatch())\n";
+      OS << "      return " << Info.Target.getName() << "AsmParser::Match_"
          << CI.DiagnosticType << ";\n";
+      OS << "    break;\n";
+    }
     else
       OS << "    break;\n";
+    OS << "    }\n";
   }
   OS << "  } // end switch (Kind)\n\n";
 
@@ -2825,6 +2990,48 @@ static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
   OS << "}\n\n";
 }
 
+static void emitAsmTiedOperandConstraints(CodeGenTarget &Target,
+                                          AsmMatcherInfo &Info,
+                                          raw_ostream &OS) {
+  std::string AsmParserName =
+      Info.AsmParser->getValueAsString("AsmParserClassName");
+  OS << "static bool ";
+  OS << "checkAsmTiedOperandConstraints(const " << Target.getName()
+     << AsmParserName << "&AsmParser,\n";
+  OS << "                               unsigned Kind,\n";
+  OS << "                               const OperandVector &Operands,\n";
+  OS << "                               uint64_t &ErrorInfo) {\n";
+  OS << "  assert(Kind < CVT_NUM_SIGNATURES && \"Invalid signature!\");\n";
+  OS << "  const uint8_t *Converter = ConversionTable[Kind];\n";
+  OS << "  for (const uint8_t *p = Converter; *p; p+= 2) {\n";
+  OS << "    switch (*p) {\n";
+  OS << "    case CVT_Tied: {\n";
+  OS << "      unsigned OpIdx = *(p+1);\n";
+  OS << "      assert(OpIdx < (size_t)(std::end(TiedAsmOperandTable) -\n";
+  OS << "                              std::begin(TiedAsmOperandTable)) &&\n";
+  OS << "             \"Tied operand not found\");\n";
+  OS << "      unsigned OpndNum1 = TiedAsmOperandTable[OpIdx][1];\n";
+  OS << "      unsigned OpndNum2 = TiedAsmOperandTable[OpIdx][2];\n";
+  OS << "      if (OpndNum1 != OpndNum2) {\n";
+  OS << "        auto &SrcOp1 = Operands[OpndNum1];\n";
+  OS << "        auto &SrcOp2 = Operands[OpndNum2];\n";
+  OS << "        if (SrcOp1->isReg() && SrcOp2->isReg()) {\n";
+  OS << "          if (!AsmParser.regsEqual(*SrcOp1, *SrcOp2)) {\n";
+  OS << "            ErrorInfo = OpndNum2;\n";
+  OS << "            return false;\n";
+  OS << "          }\n";
+  OS << "        }\n";
+  OS << "      }\n";
+  OS << "      break;\n";
+  OS << "    }\n";
+  OS << "    default:\n";
+  OS << "      break;\n";
+  OS << "    }\n";
+  OS << "  }\n";
+  OS << "  return true;\n";
+  OS << "}\n\n";
+}
+
 static void emitMnemonicSpellChecker(raw_ostream &OS, CodeGenTarget &Target,
                                      unsigned VariantCount) {
   OS << "static std::string " << Target.getName()
@@ -3072,6 +3279,9 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
       Info.Target.getName(), ClassName, "ComputeAvailableFeatures",
       Info.SubtargetFeatures, OS);
 
+  if (!ReportMultipleNearMisses)
+    emitAsmTiedOperandConstraints(Target, Info, OS);
+
   StringToOffsetTable StringTable;
 
   size_t MaxNumOperands = 0;
@@ -3495,6 +3705,12 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
 
   OS << "    if (matchingInlineAsm) {\n";
   OS << "      convertToMapAndConstraints(it->ConvertFn, Operands);\n";
+  if (!ReportMultipleNearMisses) {
+    OS << "      if (!checkAsmTiedOperandConstraints(*this, it->ConvertFn, "
+          "Operands, ErrorInfo))\n";
+    OS << "        return Match_InvalidTiedOperand;\n";
+    OS << "\n";
+  }
   OS << "      return Match_Success;\n";
   OS << "    }\n\n";
   OS << "    // We have selected a definite instruction, convert the parsed\n"
@@ -3569,6 +3785,13 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
     OS << "    }\n";
   }
 
+  if (!ReportMultipleNearMisses) {
+    OS << "    if (!checkAsmTiedOperandConstraints(*this, it->ConvertFn, "
+          "Operands, ErrorInfo))\n";
+    OS << "      return Match_InvalidTiedOperand;\n";
+    OS << "\n";
+  }
+
   OS << "    DEBUG_WITH_TYPE(\n";
   OS << "        \"asm-matcher\",\n";
   OS << "        dbgs() << \"Opcode result: complete match, selecting this opcode\\n\");\n";
diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp
index 723c0cd773f7..3c4c9c8e5c6e 100644
--- a/utils/TableGen/AsmWriterEmitter.cpp
+++ b/utils/TableGen/AsmWriterEmitter.cpp
@@ -351,8 +351,8 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
 
     // If we don't have enough bits for this operand, don't include it.
     if (NumBits > BitsLeft) {
-      DEBUG(errs() << "Not enough bits to densely encode " << NumBits
-                   << " more bits\n");
+      LLVM_DEBUG(errs() << "Not enough bits to densely encode " << NumBits
+                        << " more bits\n");
       break;
     }
 
@@ -727,10 +727,6 @@ public:
 } // end anonymous namespace
 
 static unsigned CountNumOperands(StringRef AsmString, unsigned Variant) {
-  std::string FlatAsmString =
-      CodeGenInstruction::FlattenAsmStringVariants(AsmString, Variant);
-  AsmString = FlatAsmString;
-
   return AsmString.count(' ') + AsmString.count('\t');
 }
 
@@ -782,7 +778,7 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
     const DagInit *DI = R->getValueAsDag("ResultInst");
     const DefInit *Op = cast<DefInit>(DI->getOperator());
     AliasMap[getQualifiedName(Op->getDef())].insert(
-        std::make_pair(CodeGenInstAlias(R, Variant, Target), Priority));
+        std::make_pair(CodeGenInstAlias(R, Target), Priority));
   }
 
   // A map of which conditions need to be met for each instruction operand
@@ -799,14 +795,20 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
     for (auto &Alias : Aliases.second) {
       const CodeGenInstAlias &CGA = Alias.first;
       unsigned LastOpNo = CGA.ResultInstOperandIndex.size();
-      unsigned NumResultOps =
-          CountNumOperands(CGA.ResultInst->AsmString, Variant);
+      std::string FlatInstAsmString =
+         CodeGenInstruction::FlattenAsmStringVariants(CGA.ResultInst->AsmString,
+                                                      Variant);
+      unsigned NumResultOps = CountNumOperands(FlatInstAsmString, Variant);
+
+      std::string FlatAliasAsmString =
+        CodeGenInstruction::FlattenAsmStringVariants(CGA.AsmString,
+                                                      Variant);
 
       // Don't emit the alias if it has more operands than what it's aliasing.
-      if (NumResultOps < CountNumOperands(CGA.AsmString, Variant))
+      if (NumResultOps < CountNumOperands(FlatAliasAsmString, Variant))
         continue;
 
-      IAPrinter IAP(CGA.Result->getAsString(), CGA.AsmString);
+      IAPrinter IAP(CGA.Result->getAsString(), FlatAliasAsmString);
 
       StringRef Namespace = Target.getName();
       std::vector<Record *> ReqFeatures;
diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt
index 0944d54a4273..0428249f9179 100644
--- a/utils/TableGen/CMakeLists.txt
+++ b/utils/TableGen/CMakeLists.txt
@@ -29,7 +29,9 @@ add_tablegen(llvm-tblgen LLVM
   InstrDocsEmitter.cpp
   IntrinsicEmitter.cpp
   OptParserEmitter.cpp
+  PredicateExpander.cpp
   PseudoLoweringEmitter.cpp
+  RISCVCompressInstEmitter.cpp
   RegisterBankEmitter.cpp
   RegisterInfoEmitter.cpp
   SDNodeProperties.cpp
@@ -43,6 +45,7 @@ add_tablegen(llvm-tblgen LLVM
   X86FoldTablesEmitter.cpp
   X86ModRMFilters.cpp
   X86RecognizableInstr.cpp
+  WebAssemblyDisassemblerEmitter.cpp
   CTagsEmitter.cpp
   )
 set_target_properties(llvm-tblgen PROPERTIES FOLDER "Tablegenning")
diff --git a/utils/TableGen/CTagsEmitter.cpp b/utils/TableGen/CTagsEmitter.cpp
index 5213cd904462..a0f83f1c9910 100644
--- a/utils/TableGen/CTagsEmitter.cpp
+++ b/utils/TableGen/CTagsEmitter.cpp
@@ -73,7 +73,7 @@ void CTagsEmitter::run(raw_ostream &OS) {
   for (const auto &D : Defs)
     Tags.push_back(Tag(D.first, locate(D.second.get())));
   // Emit tags.
-  std::sort(Tags.begin(), Tags.end());
+  llvm::sort(Tags.begin(), Tags.end());
   OS << "!_TAG_FILE_FORMAT\t1\t/original ctags format/\n";
   OS << "!_TAG_FILE_SORTED\t1\t/0=unsorted, 1=sorted, 2=foldcase/\n";
   for (const Tag &T : Tags)
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index 64cf23314497..1abe3a88bfbf 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -808,7 +808,7 @@ TypeSetByHwMode TypeInfer::getLegalTypes() {
 
 #ifndef NDEBUG
 TypeInfer::ValidateOnExit::~ValidateOnExit() {
-  if (!VTS.validate()) {
+  if (Infer.Validate && !VTS.validate()) {
     dbgs() << "Type set is empty for each HW mode:\n"
               "possible type contradiction in the pattern below "
               "(use -print-records with llvm-tblgen to see all "
@@ -1134,6 +1134,14 @@ Record *TreePredicateFn::getScalarMemoryVT() const {
     return nullptr;
   return R->getValueAsDef("ScalarMemoryVT");
 }
+bool TreePredicateFn::hasGISelPredicateCode() const {
+  return !PatFragRec->getRecord()
+              ->getValueAsString("GISelPredicateCode")
+              .empty();
+}
+std::string TreePredicateFn::getGISelPredicateCode() const {
+  return PatFragRec->getRecord()->getValueAsString("GISelPredicateCode");
+}
 
 StringRef TreePredicateFn::getImmType() const {
   if (immCodeUsesAPInt())
@@ -1305,7 +1313,7 @@ std::string PatternToMatch::getPredicateCheck() const {
   SmallVector<const Predicate*,4> PredList;
   for (const Predicate &P : Predicates)
     PredList.push_back(&P);
-  std::sort(PredList.begin(), PredList.end(), deref<llvm::less>());
+  llvm::sort(PredList.begin(), PredList.end(), deref<llvm::less>());
 
   std::string Check;
   for (unsigned i = 0, e = PredList.size(); i != e; ++i) {
@@ -1564,7 +1572,7 @@ bool TreePatternNode::hasProperTypeByHwMode() const {
   for (const TypeSetByHwMode &S : Types)
     if (!S.isDefaultOnly())
       return true;
-  for (TreePatternNode *C : Children)
+  for (const TreePatternNodePtr &C : Children)
     if (C->hasProperTypeByHwMode())
       return true;
   return false;
@@ -1574,7 +1582,7 @@ bool TreePatternNode::hasPossibleType() const {
   for (const TypeSetByHwMode &S : Types)
     if (!S.isPossible())
       return false;
-  for (TreePatternNode *C : Children)
+  for (const TreePatternNodePtr &C : Children)
     if (!C->hasPossibleType())
       return false;
   return true;
@@ -1587,7 +1595,7 @@ bool TreePatternNode::setDefaultMode(unsigned Mode) {
     if (S.get(DefaultMode).empty())
       return false;
   }
-  for (TreePatternNode *C : Children)
+  for (const TreePatternNodePtr &C : Children)
     if (!C->setDefaultMode(Mode))
       return false;
   return true;
@@ -1644,13 +1652,6 @@ MVT::SimpleValueType SDNodeInfo::getKnownType(unsigned ResNo) const {
 // TreePatternNode implementation
 //
 
-TreePatternNode::~TreePatternNode() {
-#if 0 // FIXME: implement refcounted tree nodes!
-  for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
-    delete getChild(i);
-#endif
-}
-
 static unsigned GetNumNodeResults(Record *Operator, CodeGenDAGPatterns &CDP) {
   if (Operator->getName() == "set" ||
       Operator->getName() == "implicit")
@@ -1662,21 +1663,31 @@ static unsigned GetNumNodeResults(Record *Operator, CodeGenDAGPatterns &CDP) {
   if (Operator->isSubClassOf("SDNode"))
     return CDP.getSDNodeInfo(Operator).getNumResults();
 
-  if (Operator->isSubClassOf("PatFrag")) {
+  if (Operator->isSubClassOf("PatFrags")) {
     // If we've already parsed this pattern fragment, get it.  Otherwise, handle
     // the forward reference case where one pattern fragment references another
     // before it is processed.
-    if (TreePattern *PFRec = CDP.getPatternFragmentIfRead(Operator))
-      return PFRec->getOnlyTree()->getNumTypes();
+    if (TreePattern *PFRec = CDP.getPatternFragmentIfRead(Operator)) {
+      // The number of results of a fragment with alternative records is the
+      // maximum number of results across all alternatives.
+      unsigned NumResults = 0;
+      for (auto T : PFRec->getTrees())
+        NumResults = std::max(NumResults, T->getNumTypes());
+      return NumResults;
+    }
 
-    // Get the result tree.
-    DagInit *Tree = Operator->getValueAsDag("Fragment");
-    Record *Op = nullptr;
-    if (Tree)
-      if (DefInit *DI = dyn_cast<DefInit>(Tree->getOperator()))
-        Op = DI->getDef();
-    assert(Op && "Invalid Fragment");
-    return GetNumNodeResults(Op, CDP);
+    ListInit *LI = Operator->getValueAsListInit("Fragments");
+    assert(LI && "Invalid Fragment");
+    unsigned NumResults = 0;
+    for (Init *I : LI->getValues()) {
+      Record *Op = nullptr;
+      if (DagInit *Dag = dyn_cast<DagInit>(I))
+        if (DefInit *DI = dyn_cast<DefInit>(Dag->getOperator()))
+          Op = DI->getDef();
+      assert(Op && "Invalid Fragment");
+      NumResults = std::max(NumResults, GetNumNodeResults(Op, CDP));
+    }
+    return NumResults;
   }
 
   if (Operator->isSubClassOf("Instruction")) {
@@ -1783,16 +1794,17 @@ bool TreePatternNode::isIsomorphicTo(const TreePatternNode *N,
 
 /// clone - Make a copy of this tree and all of its children.
 ///
-TreePatternNode *TreePatternNode::clone() const {
-  TreePatternNode *New;
+TreePatternNodePtr TreePatternNode::clone() const {
+  TreePatternNodePtr New;
   if (isLeaf()) {
-    New = new TreePatternNode(getLeafValue(), getNumTypes());
+    New = std::make_shared<TreePatternNode>(getLeafValue(), getNumTypes());
   } else {
-    std::vector<TreePatternNode*> CChildren;
+    std::vector<TreePatternNodePtr> CChildren;
     CChildren.reserve(Children.size());
     for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
       CChildren.push_back(getChild(i)->clone());
-    New = new TreePatternNode(getOperator(), CChildren, getNumTypes());
+    New = std::make_shared<TreePatternNode>(getOperator(), std::move(CChildren),
+                                            getNumTypes());
   }
   New->setName(getName());
   New->Types = Types;
@@ -1813,8 +1825,8 @@ void TreePatternNode::RemoveAllTypes() {
 
 /// SubstituteFormalArguments - Replace the formal arguments in this tree
 /// with actual values specified by ArgMap.
-void TreePatternNode::
-SubstituteFormalArguments(std::map<std::string, TreePatternNode*> &ArgMap) {
+void TreePatternNode::SubstituteFormalArguments(
+    std::map<std::string, TreePatternNodePtr> &ArgMap) {
   if (isLeaf()) return;
 
   for (unsigned i = 0, e = getNumChildren(); i != e; ++i) {
@@ -1826,12 +1838,12 @@ SubstituteFormalArguments(std::map<std::string, TreePatternNode*> &ArgMap) {
       if (isa<UnsetInit>(Val) || (isa<DefInit>(Val) &&
           cast<DefInit>(Val)->getDef()->getName() == "node")) {
         // We found a use of a formal argument, replace it with its value.
-        TreePatternNode *NewChild = ArgMap[Child->getName()];
+        TreePatternNodePtr NewChild = ArgMap[Child->getName()];
         assert(NewChild && "Couldn't find formal argument!");
         assert((Child->getPredicateFns().empty() ||
                 NewChild->getPredicateFns() == Child->getPredicateFns()) &&
                "Non-empty child predicate clobbered!");
-        setChild(i, NewChild);
+        setChild(i, std::move(NewChild));
       }
     } else {
       getChild(i)->SubstituteFormalArguments(ArgMap);
@@ -1841,29 +1853,81 @@ SubstituteFormalArguments(std::map<std::string, TreePatternNode*> &ArgMap) {
 
 
 /// InlinePatternFragments - If this pattern refers to any pattern
-/// fragments, inline them into place, giving us a pattern without any
-/// PatFrag references.
-TreePatternNode *TreePatternNode::InlinePatternFragments(TreePattern &TP) {
+/// fragments, return the set of inlined versions (this can be more than
+/// one if a PatFrags record has multiple alternatives).
+void TreePatternNode::InlinePatternFragments(
+  TreePatternNodePtr T, TreePattern &TP,
+  std::vector<TreePatternNodePtr> &OutAlternatives) {
+
   if (TP.hasError())
-    return nullptr;
+    return;
+
+  if (isLeaf()) {
+    OutAlternatives.push_back(T);  // nothing to do.
+    return;
+  }
 
-  if (isLeaf())
-     return this;  // nothing to do.
   Record *Op = getOperator();
 
-  if (!Op->isSubClassOf("PatFrag")) {
-    // Just recursively inline children nodes.
-    for (unsigned i = 0, e = getNumChildren(); i != e; ++i) {
-      TreePatternNode *Child = getChild(i);
-      TreePatternNode *NewChild = Child->InlinePatternFragments(TP);
+  if (!Op->isSubClassOf("PatFrags")) {
+    if (getNumChildren() == 0) {
+      OutAlternatives.push_back(T);
+      return;
+    }
 
-      assert((Child->getPredicateFns().empty() ||
-              NewChild->getPredicateFns() == Child->getPredicateFns()) &&
-             "Non-empty child predicate clobbered!");
+    // Recursively inline children nodes.
+    std::vector<std::vector<TreePatternNodePtr> > ChildAlternatives;
+    ChildAlternatives.resize(getNumChildren());
+    for (unsigned i = 0, e = getNumChildren(); i != e; ++i) {
+      TreePatternNodePtr Child = getChildShared(i);
+      Child->InlinePatternFragments(Child, TP, ChildAlternatives[i]);
+      // If there are no alternatives for any child, there are no
+      // alternatives for this expression as whole.
+      if (ChildAlternatives[i].empty())
+        return;
 
-      setChild(i, NewChild);
+      for (auto NewChild : ChildAlternatives[i])
+        assert((Child->getPredicateFns().empty() ||
+                NewChild->getPredicateFns() == Child->getPredicateFns()) &&
+               "Non-empty child predicate clobbered!");
     }
-    return this;
+
+    // The end result is an all-pairs construction of the resultant pattern.
+    std::vector<unsigned> Idxs;
+    Idxs.resize(ChildAlternatives.size());
+    bool NotDone;
+    do {
+      // Create the variant and add it to the output list.
+      std::vector<TreePatternNodePtr> NewChildren;
+      for (unsigned i = 0, e = ChildAlternatives.size(); i != e; ++i)
+        NewChildren.push_back(ChildAlternatives[i][Idxs[i]]);
+      TreePatternNodePtr R = std::make_shared<TreePatternNode>(
+          getOperator(), std::move(NewChildren), getNumTypes());
+
+      // Copy over properties.
+      R->setName(getName());
+      R->setPredicateFns(getPredicateFns());
+      R->setTransformFn(getTransformFn());
+      for (unsigned i = 0, e = getNumTypes(); i != e; ++i)
+        R->setType(i, getExtType(i));
+
+      // Register alternative.
+      OutAlternatives.push_back(R);
+
+      // Increment indices to the next permutation by incrementing the
+      // indices from last index backward, e.g., generate the sequence
+      // [0, 0], [0, 1], [1, 0], [1, 1].
+      int IdxsIdx;
+      for (IdxsIdx = Idxs.size() - 1; IdxsIdx >= 0; --IdxsIdx) {
+        if (++Idxs[IdxsIdx] == ChildAlternatives[IdxsIdx].size())
+          Idxs[IdxsIdx] = 0;
+        else
+          break;
+      }
+      NotDone = (IdxsIdx >= 0);
+    } while (NotDone);
+
+    return;
   }
 
   // Otherwise, we found a reference to a fragment.  First, look up its
@@ -1874,39 +1938,42 @@ TreePatternNode *TreePatternNode::InlinePatternFragments(TreePattern &TP) {
   if (Frag->getNumArgs() != Children.size()) {
     TP.error("'" + Op->getName() + "' fragment requires " +
              Twine(Frag->getNumArgs()) + " operands!");
-    return nullptr;
+    return;
   }
 
-  TreePatternNode *FragTree = Frag->getOnlyTree()->clone();
-
-  TreePredicateFn PredFn(Frag);
-  if (!PredFn.isAlwaysTrue())
-    FragTree->addPredicateFn(PredFn);
+  // Compute the map of formal to actual arguments.
+  std::map<std::string, TreePatternNodePtr> ArgMap;
+  for (unsigned i = 0, e = Frag->getNumArgs(); i != e; ++i) {
+    const TreePatternNodePtr &Child = getChildShared(i);
+    ArgMap[Frag->getArgName(i)] = Child;
+  }
 
-  // Resolve formal arguments to their actual value.
-  if (Frag->getNumArgs()) {
-    // Compute the map of formal to actual arguments.
-    std::map<std::string, TreePatternNode*> ArgMap;
-    for (unsigned i = 0, e = Frag->getNumArgs(); i != e; ++i)
-      ArgMap[Frag->getArgName(i)] = getChild(i)->InlinePatternFragments(TP);
+  // Loop over all fragment alternatives.
+  for (auto Alternative : Frag->getTrees()) {
+    TreePatternNodePtr FragTree = Alternative->clone();
 
-    FragTree->SubstituteFormalArguments(ArgMap);
-  }
+    TreePredicateFn PredFn(Frag);
+    if (!PredFn.isAlwaysTrue())
+      FragTree->addPredicateFn(PredFn);
 
-  FragTree->setName(getName());
-  for (unsigned i = 0, e = Types.size(); i != e; ++i)
-    FragTree->UpdateNodeType(i, getExtType(i), TP);
+    // Resolve formal arguments to their actual value.
+    if (Frag->getNumArgs())
+      FragTree->SubstituteFormalArguments(ArgMap);
 
-  // Transfer in the old predicates.
-  for (const TreePredicateFn &Pred : getPredicateFns())
-    FragTree->addPredicateFn(Pred);
+    // Transfer types.  Note that the resolved alternative may have fewer
+    // (but not more) results than the PatFrags node.
+    FragTree->setName(getName());
+    for (unsigned i = 0, e = FragTree->getNumTypes(); i != e; ++i)
+      FragTree->UpdateNodeType(i, getExtType(i), TP);
 
-  // Get a new copy of this fragment to stitch into here.
-  //delete this;    // FIXME: implement refcounting!
+    // Transfer in the old predicates.
+    for (const TreePredicateFn &Pred : getPredicateFns())
+      FragTree->addPredicateFn(Pred);
 
-  // The fragment we inlined could have recursive inlining that is needed.  See
-  // if there are any pattern fragments in it and inline them as needed.
-  return FragTree->InlinePatternFragments(TP);
+    // The fragment we inlined could have recursive inlining that is needed.  See
+    // if there are any pattern fragments in it and inline them as needed.
+    FragTree->InlinePatternFragments(FragTree, TP, OutAlternatives);
+  }
 }
 
 /// getImplicitType - Check to see if the specified record has an implicit
@@ -1953,7 +2020,7 @@ static TypeSetByHwMode getImplicitType(Record *R, unsigned ResNo,
     return TypeSetByHwMode(T.getRegisterClass(R).getValueTypes());
   }
 
-  if (R->isSubClassOf("PatFrag")) {
+  if (R->isSubClassOf("PatFrags")) {
     assert(ResNo == 0 && "FIXME: PatFrag with multiple results?");
     // Pattern fragment types will be resolved when they are inlined.
     return TypeSetByHwMode(); // Unknown.
@@ -2205,35 +2272,6 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
     return false;
   }
 
-  // special handling for set, which isn't really an SDNode.
-  if (getOperator()->getName() == "set") {
-    assert(getNumTypes() == 0 && "Set doesn't produce a value");
-    assert(getNumChildren() >= 2 && "Missing RHS of a set?");
-    unsigned NC = getNumChildren();
-
-    TreePatternNode *SetVal = getChild(NC-1);
-    bool MadeChange = SetVal->ApplyTypeConstraints(TP, NotRegisters);
-
-    for (unsigned i = 0; i < NC-1; ++i) {
-      TreePatternNode *Child = getChild(i);
-      MadeChange |= Child->ApplyTypeConstraints(TP, NotRegisters);
-
-      // Types of operands must match.
-      MadeChange |= Child->UpdateNodeType(0, SetVal->getExtType(i), TP);
-      MadeChange |= SetVal->UpdateNodeType(i, Child->getExtType(0), TP);
-    }
-    return MadeChange;
-  }
-
-  if (getOperator()->getName() == "implicit") {
-    assert(getNumTypes() == 0 && "Node doesn't produce a value");
-
-    bool MadeChange = false;
-    for (unsigned i = 0; i < getNumChildren(); ++i)
-      MadeChange |= getChild(i)->ApplyTypeConstraints(TP, NotRegisters);
-    return MadeChange;
-  }
-
   if (const CodeGenIntrinsic *Int = getIntrinsicInfo(CDP)) {
     bool MadeChange = false;
 
@@ -2508,10 +2546,10 @@ TreePattern::TreePattern(Record *TheRec, DagInit *Pat, bool isInput,
   Trees.push_back(ParseTreePattern(Pat, ""));
 }
 
-TreePattern::TreePattern(Record *TheRec, TreePatternNode *Pat, bool isInput,
-                         CodeGenDAGPatterns &cdp) : TheRecord(TheRec), CDP(cdp),
-                         isInputPattern(isInput), HasError(false),
-                         Infer(*this) {
+TreePattern::TreePattern(Record *TheRec, TreePatternNodePtr Pat, bool isInput,
+                         CodeGenDAGPatterns &cdp)
+    : TheRecord(TheRec), CDP(cdp), isInputPattern(isInput), HasError(false),
+      Infer(*this) {
   Trees.push_back(Pat);
 }
 
@@ -2524,8 +2562,8 @@ void TreePattern::error(const Twine &Msg) {
 }
 
 void TreePattern::ComputeNamedNodes() {
-  for (TreePatternNode *Tree : Trees)
-    ComputeNamedNodes(Tree);
+  for (TreePatternNodePtr &Tree : Trees)
+    ComputeNamedNodes(Tree.get());
 }
 
 void TreePattern::ComputeNamedNodes(TreePatternNode *N) {
@@ -2536,22 +2574,22 @@ void TreePattern::ComputeNamedNodes(TreePatternNode *N) {
     ComputeNamedNodes(N->getChild(i));
 }
 
-
-TreePatternNode *TreePattern::ParseTreePattern(Init *TheInit, StringRef OpName){
+TreePatternNodePtr TreePattern::ParseTreePattern(Init *TheInit,
+                                                 StringRef OpName) {
   if (DefInit *DI = dyn_cast<DefInit>(TheInit)) {
     Record *R = DI->getDef();
 
     // Direct reference to a leaf DagNode or PatFrag?  Turn it into a
     // TreePatternNode of its own.  For example:
     ///   (foo GPR, imm) -> (foo GPR, (imm))
-    if (R->isSubClassOf("SDNode") || R->isSubClassOf("PatFrag"))
+    if (R->isSubClassOf("SDNode") || R->isSubClassOf("PatFrags"))
       return ParseTreePattern(
         DagInit::get(DI, nullptr,
                      std::vector<std::pair<Init*, StringInit*> >()),
         OpName);
 
     // Input argument?
-    TreePatternNode *Res = new TreePatternNode(DI, 1);
+    TreePatternNodePtr Res = std::make_shared<TreePatternNode>(DI, 1);
     if (R->getName() == "node" && !OpName.empty()) {
       if (OpName.empty())
         error("'node' argument requires a name to match with operand list");
@@ -2566,16 +2604,18 @@ TreePatternNode *TreePattern::ParseTreePattern(Init *TheInit, StringRef OpName){
   if (isa<UnsetInit>(TheInit)) {
     if (OpName.empty())
       error("'?' argument requires a name to match with operand list");
-    TreePatternNode *Res = new TreePatternNode(TheInit, 1);
+    TreePatternNodePtr Res = std::make_shared<TreePatternNode>(TheInit, 1);
     Args.push_back(OpName);
     Res->setName(OpName);
     return Res;
   }
 
-  if (IntInit *II = dyn_cast<IntInit>(TheInit)) {
+  if (isa<IntInit>(TheInit) || isa<BitInit>(TheInit)) {
     if (!OpName.empty())
-      error("Constant int argument should not have a name!");
-    return new TreePatternNode(II, 1);
+      error("Constant int or bit argument should not have a name!");
+    if (isa<BitInit>(TheInit))
+      TheInit = TheInit->convertInitializerTo(IntRecTy::get());
+    return std::make_shared<TreePatternNode>(TheInit, 1);
   }
 
   if (BitsInit *BI = dyn_cast<BitsInit>(TheInit)) {
@@ -2601,8 +2641,8 @@ TreePatternNode *TreePattern::ParseTreePattern(Init *TheInit, StringRef OpName){
     if (Dag->getNumArgs() != 1)
       error("Type cast only takes one operand!");
 
-    TreePatternNode *New = ParseTreePattern(Dag->getArg(0),
-                                            Dag->getArgNameStr(0));
+    TreePatternNodePtr New =
+        ParseTreePattern(Dag->getArg(0), Dag->getArgNameStr(0));
 
     // Apply the type cast.
     assert(New->getNumTypes() == 1 && "FIXME: Unhandled");
@@ -2615,7 +2655,7 @@ TreePatternNode *TreePattern::ParseTreePattern(Init *TheInit, StringRef OpName){
   }
 
   // Verify that this is something that makes sense for an operator.
-  if (!Operator->isSubClassOf("PatFrag") &&
+  if (!Operator->isSubClassOf("PatFrags") &&
       !Operator->isSubClassOf("SDNode") &&
       !Operator->isSubClassOf("Instruction") &&
       !Operator->isSubClassOf("SDNodeXForm") &&
@@ -2650,13 +2690,17 @@ TreePatternNode *TreePattern::ParseTreePattern(Init *TheInit, StringRef OpName){
       error("Cannot use '" + Operator->getName() + "' in an output pattern!");
   }
 
-  std::vector<TreePatternNode*> Children;
+  std::vector<TreePatternNodePtr> Children;
 
   // Parse all the operands.
   for (unsigned i = 0, e = Dag->getNumArgs(); i != e; ++i)
     Children.push_back(ParseTreePattern(Dag->getArg(i), Dag->getArgNameStr(i)));
 
-  // If the operator is an intrinsic, then this is just syntactic sugar for for
+  // Get the actual number of results before Operator is converted to an intrinsic
+  // node (which is hard-coded to have either zero or one result).
+  unsigned NumResults = GetNumNodeResults(Operator, CDP);
+
+  // If the operator is an intrinsic, then this is just syntactic sugar for
   // (intrinsic_* <number>, ..children..).  Pick the right intrinsic node, and
   // convert the intrinsic name to a number.
   if (Operator->isSubClassOf("Intrinsic")) {
@@ -2673,13 +2717,13 @@ TreePatternNode *TreePattern::ParseTreePattern(Init *TheInit, StringRef OpName){
     else // Otherwise, no chain.
       Operator = getDAGPatterns().get_intrinsic_wo_chain_sdnode();
 
-    TreePatternNode *IIDNode = new TreePatternNode(IntInit::get(IID), 1);
-    Children.insert(Children.begin(), IIDNode);
+    Children.insert(Children.begin(),
+                    std::make_shared<TreePatternNode>(IntInit::get(IID), 1));
   }
 
   if (Operator->isSubClassOf("ComplexPattern")) {
     for (unsigned i = 0; i < Children.size(); ++i) {
-      TreePatternNode *Child = Children[i];
+      TreePatternNodePtr Child = Children[i];
 
       if (Child->getName().empty())
         error("All arguments to a ComplexPattern must be named");
@@ -2698,8 +2742,9 @@ TreePatternNode *TreePattern::ParseTreePattern(Init *TheInit, StringRef OpName){
     }
   }
 
-  unsigned NumResults = GetNumNodeResults(Operator, CDP);
-  TreePatternNode *Result = new TreePatternNode(Operator, Children, NumResults);
+  TreePatternNodePtr Result =
+      std::make_shared<TreePatternNode>(Operator, std::move(Children),
+                                        NumResults);
   Result->setName(OpName);
 
   if (Dag->getName()) {
@@ -2715,7 +2760,7 @@ TreePatternNode *TreePattern::ParseTreePattern(Init *TheInit, StringRef OpName){
 /// more type generic things and have useless type casts fold away.
 ///
 /// This returns true if any change is made.
-static bool SimplifyTree(TreePatternNode *&N) {
+static bool SimplifyTree(TreePatternNodePtr &N) {
   if (N->isLeaf())
     return false;
 
@@ -2725,7 +2770,7 @@ static bool SimplifyTree(TreePatternNode *&N) {
       N->getExtType(0).isValueTypeByHwMode(false) &&
       N->getExtType(0) == N->getChild(0)->getExtType(0) &&
       N->getName().empty()) {
-    N = N->getChild(0);
+    N = N->getChildShared(0);
     SimplifyTree(N);
     return true;
   }
@@ -2733,9 +2778,9 @@ static bool SimplifyTree(TreePatternNode *&N) {
   // Walk all children.
   bool MadeChange = false;
   for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i) {
-    TreePatternNode *Child = N->getChild(i);
+    TreePatternNodePtr Child = N->getChildShared(i);
     MadeChange |= SimplifyTree(Child);
-    N->setChild(i, Child);
+    N->setChild(i, std::move(Child));
   }
   return MadeChange;
 }
@@ -2753,7 +2798,7 @@ InferAllTypes(const StringMap<SmallVector<TreePatternNode*,1> > *InNamedTypes) {
   bool MadeChange = true;
   while (MadeChange) {
     MadeChange = false;
-    for (TreePatternNode *&Tree : Trees) {
+    for (TreePatternNodePtr &Tree : Trees) {
       MadeChange |= Tree->ApplyTypeConstraints(*this, false);
       MadeChange |= SimplifyTree(Tree);
     }
@@ -2781,7 +2826,7 @@ InferAllTypes(const StringMap<SmallVector<TreePatternNode*,1> > *InNamedTypes) {
           // changing the type of the input register in this case.  This allows
           // us to match things like:
           //  def : Pat<(v1i64 (bitconvert(v2i32 DPR:$src))), (v1i64 DPR:$src)>;
-          if (Node == Trees[0] && Node->isLeaf()) {
+          if (Node == Trees[0].get() && Node->isLeaf()) {
             DefInit *DI = dyn_cast<DefInit>(Node->getLeafValue());
             if (DI && (DI->getDef()->isSubClassOf("RegisterClass") ||
                        DI->getDef()->isSubClassOf("RegisterOperand")))
@@ -2812,7 +2857,7 @@ InferAllTypes(const StringMap<SmallVector<TreePatternNode*,1> > *InNamedTypes) {
   }
 
   bool HasUnresolvedTypes = false;
-  for (const TreePatternNode *Tree : Trees)
+  for (const TreePatternNodePtr &Tree : Trees)
     HasUnresolvedTypes |= Tree->ContainsUnresolvedType(*this);
   return !HasUnresolvedTypes;
 }
@@ -2829,7 +2874,7 @@ void TreePattern::print(raw_ostream &OS) const {
 
   if (Trees.size() > 1)
     OS << "[\n";
-  for (const TreePatternNode *Tree : Trees) {
+  for (const TreePatternNodePtr &Tree : Trees) {
     OS << "\t";
     Tree->print(OS);
     OS << "\n";
@@ -2933,17 +2978,17 @@ void CodeGenDAGPatterns::ParseComplexPatterns() {
 /// inside a pattern fragment to a pattern fragment.
 ///
 void CodeGenDAGPatterns::ParsePatternFragments(bool OutFrags) {
-  std::vector<Record*> Fragments = Records.getAllDerivedDefinitions("PatFrag");
+  std::vector<Record*> Fragments = Records.getAllDerivedDefinitions("PatFrags");
 
   // First step, parse all of the fragments.
   for (Record *Frag : Fragments) {
     if (OutFrags != Frag->isSubClassOf("OutPatFrag"))
       continue;
 
-    DagInit *Tree = Frag->getValueAsDag("Fragment");
+    ListInit *LI = Frag->getValueAsListInit("Fragments");
     TreePattern *P =
         (PatternFragments[Frag] = llvm::make_unique<TreePattern>(
-             Frag, Tree, !Frag->isSubClassOf("OutPatFrag"),
+             Frag, LI, !Frag->isSubClassOf("OutPatFrag"),
              *this)).get();
 
     // Validate the argument list, converting it to set, to discard duplicates.
@@ -2991,13 +3036,15 @@ void CodeGenDAGPatterns::ParsePatternFragments(bool OutFrags) {
     // this fragment uses it.
     TreePredicateFn PredFn(P);
     if (!PredFn.isAlwaysTrue())
-      P->getOnlyTree()->addPredicateFn(PredFn);
+      for (auto T : P->getTrees())
+        T->addPredicateFn(PredFn);
 
     // If there is a node transformation corresponding to this, keep track of
     // it.
     Record *Transform = Frag->getValueAsDef("OperandTransform");
     if (!getSDNodeTransform(Transform).second.empty())    // not noop xform?
-      P->getOnlyTree()->setTransformFn(Transform);
+      for (auto T : P->getTrees())
+        T->setTransformFn(Transform);
   }
 
   // Now that we've parsed all of the tree fragments, do a closure on them so
@@ -3010,12 +3057,18 @@ void CodeGenDAGPatterns::ParsePatternFragments(bool OutFrags) {
     ThePat.InlinePatternFragments();
 
     // Infer as many types as possible.  Don't worry about it if we don't infer
-    // all of them, some may depend on the inputs of the pattern.
-    ThePat.InferAllTypes();
-    ThePat.resetError();
+    // all of them, some may depend on the inputs of the pattern.  Also, don't
+    // validate type sets; validation may cause spurious failures e.g. if a
+    // fragment needs floating-point types but the current target does not have
+    // any (this is only an error if that fragment is ever used!).
+    {
+      TypeInfer::SuppressValidation SV(ThePat.getInfer());
+      ThePat.InferAllTypes();
+      ThePat.resetError();
+    }
 
     // If debugging, print out the pattern fragment result.
-    DEBUG(ThePat.dump());
+    LLVM_DEBUG(ThePat.dump());
   }
 }
 
@@ -3045,9 +3098,9 @@ void CodeGenDAGPatterns::ParseDefaultOperands() {
     // Copy the operands over into a DAGDefaultOperand.
     DAGDefaultOperand DefaultOpInfo;
 
-    TreePatternNode *T = P.getTree(0);
+    const TreePatternNodePtr &T = P.getTree(0);
     for (unsigned op = 0, e = T->getNumChildren(); op != e; ++op) {
-      TreePatternNode *TPN = T->getChild(op);
+      TreePatternNodePtr TPN = T->getChildShared(op);
       while (TPN->ApplyTypeConstraints(P, false))
         /* Resolve all types */;
 
@@ -3056,7 +3109,7 @@ void CodeGenDAGPatterns::ParseDefaultOperands() {
                         DefaultOps[i]->getName() +
                         "' doesn't have a concrete type!");
       }
-      DefaultOpInfo.DefaultOps.push_back(TPN);
+      DefaultOpInfo.DefaultOps.push_back(std::move(TPN));
     }
 
     // Insert it into the DefaultOperands map so we can find it later.
@@ -3066,15 +3119,15 @@ void CodeGenDAGPatterns::ParseDefaultOperands() {
 
 /// HandleUse - Given "Pat" a leaf in the pattern, check to see if it is an
 /// instruction input.  Return true if this is a real use.
-static bool HandleUse(TreePattern *I, TreePatternNode *Pat,
-                      std::map<std::string, TreePatternNode*> &InstInputs) {
+static bool HandleUse(TreePattern &I, TreePatternNodePtr Pat,
+                      std::map<std::string, TreePatternNodePtr> &InstInputs) {
   // No name -> not interesting.
   if (Pat->getName().empty()) {
     if (Pat->isLeaf()) {
       DefInit *DI = dyn_cast<DefInit>(Pat->getLeafValue());
       if (DI && (DI->getDef()->isSubClassOf("RegisterClass") ||
                  DI->getDef()->isSubClassOf("RegisterOperand")))
-        I->error("Input " + DI->getDef()->getName() + " must be named!");
+        I.error("Input " + DI->getDef()->getName() + " must be named!");
     }
     return false;
   }
@@ -3082,7 +3135,8 @@ static bool HandleUse(TreePattern *I, TreePatternNode *Pat,
   Record *Rec;
   if (Pat->isLeaf()) {
     DefInit *DI = dyn_cast<DefInit>(Pat->getLeafValue());
-    if (!DI) I->error("Input $" + Pat->getName() + " must be an identifier!");
+    if (!DI)
+      I.error("Input $" + Pat->getName() + " must be an identifier!");
     Rec = DI->getDef();
   } else {
     Rec = Pat->getOperator();
@@ -3092,7 +3146,7 @@ static bool HandleUse(TreePattern *I, TreePatternNode *Pat,
   if (Rec->getName() == "srcvalue")
     return false;
 
-  TreePatternNode *&Slot = InstInputs[Pat->getName()];
+  TreePatternNodePtr &Slot = InstInputs[Pat->getName()];
   if (!Slot) {
     Slot = Pat;
     return true;
@@ -3107,24 +3161,38 @@ static bool HandleUse(TreePattern *I, TreePatternNode *Pat,
 
   // Ensure that the inputs agree if we've already seen this input.
   if (Rec != SlotRec)
-    I->error("All $" + Pat->getName() + " inputs must agree with each other");
+    I.error("All $" + Pat->getName() + " inputs must agree with each other");
+  // Ensure that the types can agree as well.
+  Slot->UpdateNodeType(0, Pat->getExtType(0), I);
+  Pat->UpdateNodeType(0, Slot->getExtType(0), I);
   if (Slot->getExtTypes() != Pat->getExtTypes())
-    I->error("All $" + Pat->getName() + " inputs must agree with each other");
+    I.error("All $" + Pat->getName() + " inputs must agree with each other");
   return true;
 }
 
 /// FindPatternInputsAndOutputs - Scan the specified TreePatternNode (which is
 /// part of "I", the instruction), computing the set of inputs and outputs of
 /// the pattern.  Report errors if we see anything naughty.
-void CodeGenDAGPatterns::
-FindPatternInputsAndOutputs(TreePattern *I, TreePatternNode *Pat,
-                            std::map<std::string, TreePatternNode*> &InstInputs,
-                            std::map<std::string, TreePatternNode*>&InstResults,
-                            std::vector<Record*> &InstImpResults) {
+void CodeGenDAGPatterns::FindPatternInputsAndOutputs(
+    TreePattern &I, TreePatternNodePtr Pat,
+    std::map<std::string, TreePatternNodePtr> &InstInputs,
+    std::map<std::string, TreePatternNodePtr> &InstResults,
+    std::vector<Record *> &InstImpResults) {
+
+  // The instruction pattern still has unresolved fragments.  For *named*
+  // nodes we must resolve those here.  This may not result in multiple
+  // alternatives.
+  if (!Pat->getName().empty()) {
+    TreePattern SrcPattern(I.getRecord(), Pat, true, *this);
+    SrcPattern.InlinePatternFragments();
+    SrcPattern.InferAllTypes();
+    Pat = SrcPattern.getOnlyTree();
+  }
+
   if (Pat->isLeaf()) {
     bool isUse = HandleUse(I, Pat, InstInputs);
     if (!isUse && Pat->getTransformFn())
-      I->error("Cannot specify a transform function for a non-input value!");
+      I.error("Cannot specify a transform function for a non-input value!");
     return;
   }
 
@@ -3132,11 +3200,11 @@ FindPatternInputsAndOutputs(TreePattern *I, TreePatternNode *Pat,
     for (unsigned i = 0, e = Pat->getNumChildren(); i != e; ++i) {
       TreePatternNode *Dest = Pat->getChild(i);
       if (!Dest->isLeaf())
-        I->error("implicitly defined value should be a register!");
+        I.error("implicitly defined value should be a register!");
 
       DefInit *Val = dyn_cast<DefInit>(Dest->getLeafValue());
       if (!Val || !Val->getDef()->isSubClassOf("Register"))
-        I->error("implicitly defined value should be a register!");
+        I.error("implicitly defined value should be a register!");
       InstImpResults.push_back(Val->getDef());
     }
     return;
@@ -3147,9 +3215,9 @@ FindPatternInputsAndOutputs(TreePattern *I, TreePatternNode *Pat,
     // and recurse.
     for (unsigned i = 0, e = Pat->getNumChildren(); i != e; ++i) {
       if (Pat->getChild(i)->getNumTypes() == 0)
-        I->error("Cannot have void nodes inside of patterns!");
-      FindPatternInputsAndOutputs(I, Pat->getChild(i), InstInputs, InstResults,
-                                  InstImpResults);
+        I.error("Cannot have void nodes inside of patterns!");
+      FindPatternInputsAndOutputs(I, Pat->getChildShared(i), InstInputs,
+                                  InstResults, InstImpResults);
     }
 
     // If this is a non-leaf node with no children, treat it basically as if
@@ -3157,27 +3225,33 @@ FindPatternInputsAndOutputs(TreePattern *I, TreePatternNode *Pat,
     bool isUse = HandleUse(I, Pat, InstInputs);
 
     if (!isUse && Pat->getTransformFn())
-      I->error("Cannot specify a transform function for a non-input value!");
+      I.error("Cannot specify a transform function for a non-input value!");
     return;
   }
 
   // Otherwise, this is a set, validate and collect instruction results.
   if (Pat->getNumChildren() == 0)
-    I->error("set requires operands!");
+    I.error("set requires operands!");
 
   if (Pat->getTransformFn())
-    I->error("Cannot specify a transform function on a set node!");
+    I.error("Cannot specify a transform function on a set node!");
 
   // Check the set destinations.
   unsigned NumDests = Pat->getNumChildren()-1;
   for (unsigned i = 0; i != NumDests; ++i) {
-    TreePatternNode *Dest = Pat->getChild(i);
+    TreePatternNodePtr Dest = Pat->getChildShared(i);
+    // For set destinations we also must resolve fragments here.
+    TreePattern DestPattern(I.getRecord(), Dest, false, *this);
+    DestPattern.InlinePatternFragments();
+    DestPattern.InferAllTypes();
+    Dest = DestPattern.getOnlyTree();
+
     if (!Dest->isLeaf())
-      I->error("set destination should be a register!");
+      I.error("set destination should be a register!");
 
     DefInit *Val = dyn_cast<DefInit>(Dest->getLeafValue());
     if (!Val) {
-      I->error("set destination should be a register!");
+      I.error("set destination should be a register!");
       continue;
     }
 
@@ -3186,20 +3260,20 @@ FindPatternInputsAndOutputs(TreePattern *I, TreePatternNode *Pat,
         Val->getDef()->isSubClassOf("RegisterOperand") ||
         Val->getDef()->isSubClassOf("PointerLikeRegClass")) {
       if (Dest->getName().empty())
-        I->error("set destination must have a name!");
+        I.error("set destination must have a name!");
       if (InstResults.count(Dest->getName()))
-        I->error("cannot set '" + Dest->getName() +"' multiple times");
+        I.error("cannot set '" + Dest->getName() + "' multiple times");
       InstResults[Dest->getName()] = Dest;
     } else if (Val->getDef()->isSubClassOf("Register")) {
       InstImpResults.push_back(Val->getDef());
     } else {
-      I->error("set destination should be a register!");
+      I.error("set destination should be a register!");
     }
   }
 
   // Verify and collect info from the computation.
-  FindPatternInputsAndOutputs(I, Pat->getChild(NumDests),
-                              InstInputs, InstResults, InstImpResults);
+  FindPatternInputsAndOutputs(I, Pat->getChildShared(NumDests), InstInputs,
+                              InstResults, InstImpResults);
 }
 
 //===----------------------------------------------------------------------===//
@@ -3214,18 +3288,17 @@ public:
   bool mayLoad;
   bool isBitcast;
   bool isVariadic;
+  bool hasChain;
 
   InstAnalyzer(const CodeGenDAGPatterns &cdp)
     : CDP(cdp), hasSideEffects(false), mayStore(false), mayLoad(false),
-      isBitcast(false), isVariadic(false) {}
-
-  void Analyze(const TreePattern *Pat) {
-    // Assume only the first tree is the pattern. The others are clobber nodes.
-    AnalyzeNode(Pat->getTree(0));
-  }
+      isBitcast(false), isVariadic(false), hasChain(false) {}
 
   void Analyze(const PatternToMatch &Pat) {
-    AnalyzeNode(Pat.getSrcPattern());
+    const TreePatternNode *N = Pat.getSrcPattern();
+    AnalyzeNode(N);
+    // These properties are detected only on the root node.
+    isBitcast = IsNodeBitcast(N);
   }
 
 private:
@@ -3233,20 +3306,12 @@ private:
     if (hasSideEffects || mayLoad || mayStore || isVariadic)
       return false;
 
-    if (N->getNumChildren() != 2)
+    if (N->isLeaf())
       return false;
-
-    const TreePatternNode *N0 = N->getChild(0);
-    if (!N0->isLeaf() || !isa<DefInit>(N0->getLeafValue()))
+    if (N->getNumChildren() != 1 || !N->getChild(0)->isLeaf())
       return false;
 
-    const TreePatternNode *N1 = N->getChild(1);
-    if (N1->isLeaf())
-      return false;
-    if (N1->getNumChildren() != 1 || !N1->getChild(0)->isLeaf())
-      return false;
-
-    const SDNodeInfo &OpInfo = CDP.getSDNodeInfo(N1->getOperator());
+    const SDNodeInfo &OpInfo = CDP.getSDNodeInfo(N->getOperator());
     if (OpInfo.getNumResults() != 1 || OpInfo.getNumOperands() != 1)
       return false;
     return OpInfo.getEnumName() == "ISD::BITCAST";
@@ -3272,17 +3337,12 @@ public:
     for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i)
       AnalyzeNode(N->getChild(i));
 
-    // Ignore set nodes, which are not SDNodes.
-    if (N->getOperator()->getName() == "set") {
-      isBitcast = IsNodeBitcast(N);
-      return;
-    }
-
     // Notice properties of the node.
     if (N->NodeHasProperty(SDNPMayStore, CDP)) mayStore = true;
     if (N->NodeHasProperty(SDNPMayLoad, CDP)) mayLoad = true;
     if (N->NodeHasProperty(SDNPSideEffect, CDP)) hasSideEffects = true;
     if (N->NodeHasProperty(SDNPVariadic, CDP)) isVariadic = true;
+    if (N->NodeHasProperty(SDNPHasChain, CDP)) hasChain = true;
 
     if (const CodeGenIntrinsic *IntInfo = N->getIntrinsicInfo(CDP)) {
       // If this is an intrinsic, analyze it.
@@ -3345,7 +3405,13 @@ static bool InferFromPattern(CodeGenInstruction &InstInfo,
   InstInfo.mayLoad |= PatInfo.mayLoad;
 
   // These flags are silently added without any verification.
-  InstInfo.isBitcast |= PatInfo.isBitcast;
+  // FIXME: To match historical behavior of TableGen, for now add those flags
+  // only when we're inferring from the primary instruction pattern.
+  if (PatDef->isSubClassOf("Instruction")) {
+    InstInfo.isBitcast |= PatInfo.isBitcast;
+    InstInfo.hasChain |= PatInfo.hasChain;
+    InstInfo.hasChain_Inferred = true;
+  }
 
   // Don't infer isVariadic. This flag means something different on SDNodes and
   // instructions. For example, a CALL SDNode is variadic because it has the
@@ -3416,37 +3482,30 @@ static bool checkOperandClass(CGIOperandList::OperandInfo &OI,
   return false;
 }
 
-const DAGInstruction &CodeGenDAGPatterns::parseInstructionPattern(
+void CodeGenDAGPatterns::parseInstructionPattern(
     CodeGenInstruction &CGI, ListInit *Pat, DAGInstMap &DAGInsts) {
 
   assert(!DAGInsts.count(CGI.TheDef) && "Instruction already parsed!");
 
   // Parse the instruction.
-  TreePattern *I = new TreePattern(CGI.TheDef, Pat, true, *this);
-  // Inline pattern fragments into it.
-  I->InlinePatternFragments();
-
-  // Infer as many types as possible.  If we cannot infer all of them, we can
-  // never do anything with this instruction pattern: report it to the user.
-  if (!I->InferAllTypes())
-    I->error("Could not infer all types in pattern!");
+  TreePattern I(CGI.TheDef, Pat, true, *this);
 
   // InstInputs - Keep track of all of the inputs of the instruction, along
   // with the record they are declared as.
-  std::map<std::string, TreePatternNode*> InstInputs;
+  std::map<std::string, TreePatternNodePtr> InstInputs;
 
   // InstResults - Keep track of all the virtual registers that are 'set'
   // in the instruction, including what reg class they are.
-  std::map<std::string, TreePatternNode*> InstResults;
+  std::map<std::string, TreePatternNodePtr> InstResults;
 
   std::vector<Record*> InstImpResults;
 
   // Verify that the top-level forms in the instruction are of void type, and
   // fill in the InstResults map.
   SmallString<32> TypesString;
-  for (unsigned j = 0, e = I->getNumTrees(); j != e; ++j) {
+  for (unsigned j = 0, e = I.getNumTrees(); j != e; ++j) {
     TypesString.clear();
-    TreePatternNode *Pat = I->getTree(j);
+    TreePatternNodePtr Pat = I.getTree(j);
     if (Pat->getNumTypes() != 0) {
       raw_svector_ostream OS(TypesString);
       for (unsigned k = 0, ke = Pat->getNumTypes(); k != ke; ++k) {
@@ -3454,7 +3513,7 @@ const DAGInstruction &CodeGenDAGPatterns::parseInstructionPattern(
           OS << ", ";
         Pat->getExtType(k).writeToStream(OS);
       }
-      I->error("Top-level forms in instruction pattern should have"
+      I.error("Top-level forms in instruction pattern should have"
                " void types, has types " +
                OS.str());
     }
@@ -3470,31 +3529,31 @@ const DAGInstruction &CodeGenDAGPatterns::parseInstructionPattern(
   unsigned NumResults = InstResults.size();
 
   // Parse the operands list from the (ops) list, validating it.
-  assert(I->getArgList().empty() && "Args list should still be empty here!");
+  assert(I.getArgList().empty() && "Args list should still be empty here!");
 
   // Check that all of the results occur first in the list.
   std::vector<Record*> Results;
-  SmallVector<TreePatternNode *, 2> ResNodes;
+  SmallVector<TreePatternNodePtr, 2> ResNodes;
   for (unsigned i = 0; i != NumResults; ++i) {
     if (i == CGI.Operands.size())
-      I->error("'" + InstResults.begin()->first +
+      I.error("'" + InstResults.begin()->first +
                "' set but does not appear in operand list!");
     const std::string &OpName = CGI.Operands[i].Name;
 
     // Check that it exists in InstResults.
-    TreePatternNode *RNode = InstResults[OpName];
+    TreePatternNodePtr RNode = InstResults[OpName];
     if (!RNode)
-      I->error("Operand $" + OpName + " does not exist in operand list!");
+      I.error("Operand $" + OpName + " does not exist in operand list!");
 
-    ResNodes.push_back(RNode);
 
     Record *R = cast<DefInit>(RNode->getLeafValue())->getDef();
+    ResNodes.push_back(std::move(RNode));
     if (!R)
-      I->error("Operand $" + OpName + " should be a set destination: all "
+      I.error("Operand $" + OpName + " should be a set destination: all "
                "outputs must occur before inputs in operand list!");
 
     if (!checkOperandClass(CGI.Operands[i], R))
-      I->error("Operand $" + OpName + " class mismatch!");
+      I.error("Operand $" + OpName + " class mismatch!");
 
     // Remember the return type.
     Results.push_back(CGI.Operands[i].Rec);
@@ -3503,19 +3562,16 @@ const DAGInstruction &CodeGenDAGPatterns::parseInstructionPattern(
     InstResults.erase(OpName);
   }
 
-  // Loop over the inputs next.  Make a copy of InstInputs so we can destroy
-  // the copy while we're checking the inputs.
-  std::map<std::string, TreePatternNode*> InstInputsCheck(InstInputs);
-
-  std::vector<TreePatternNode*> ResultNodeOperands;
+  // Loop over the inputs next.
+  std::vector<TreePatternNodePtr> ResultNodeOperands;
   std::vector<Record*> Operands;
   for (unsigned i = NumResults, e = CGI.Operands.size(); i != e; ++i) {
     CGIOperandList::OperandInfo &Op = CGI.Operands[i];
     const std::string &OpName = Op.Name;
     if (OpName.empty())
-      I->error("Operand #" + Twine(i) + " in operands list has no name!");
+      I.error("Operand #" + Twine(i) + " in operands list has no name!");
 
-    if (!InstInputsCheck.count(OpName)) {
+    if (!InstInputs.count(OpName)) {
       // If this is an operand with a DefaultOps set filled in, we can ignore
       // this.  When we codegen it, we will do so as always executed.
       if (Op.Rec->isSubClassOf("OperandWithDefaultOps")) {
@@ -3524,22 +3580,22 @@ const DAGInstruction &CodeGenDAGPatterns::parseInstructionPattern(
         if (!getDefaultOperand(Op.Rec).DefaultOps.empty())
           continue;
       }
-      I->error("Operand $" + OpName +
+      I.error("Operand $" + OpName +
                " does not appear in the instruction pattern");
     }
-    TreePatternNode *InVal = InstInputsCheck[OpName];
-    InstInputsCheck.erase(OpName);   // It occurred, remove from map.
+    TreePatternNodePtr InVal = InstInputs[OpName];
+    InstInputs.erase(OpName);   // It occurred, remove from map.
 
     if (InVal->isLeaf() && isa<DefInit>(InVal->getLeafValue())) {
       Record *InRec = static_cast<DefInit*>(InVal->getLeafValue())->getDef();
       if (!checkOperandClass(Op, InRec))
-        I->error("Operand $" + OpName + "'s register class disagrees"
+        I.error("Operand $" + OpName + "'s register class disagrees"
                  " between the operand and pattern");
     }
     Operands.push_back(Op.Rec);
 
     // Construct the result for the dest-pattern operand list.
-    TreePatternNode *OpNode = InVal->clone();
+    TreePatternNodePtr OpNode = InVal->clone();
 
     // No predicate is useful on the result.
     OpNode->clearPredicateFns();
@@ -3547,42 +3603,47 @@ const DAGInstruction &CodeGenDAGPatterns::parseInstructionPattern(
     // Promote the xform function to be an explicit node if set.
     if (Record *Xform = OpNode->getTransformFn()) {
       OpNode->setTransformFn(nullptr);
-      std::vector<TreePatternNode*> Children;
+      std::vector<TreePatternNodePtr> Children;
       Children.push_back(OpNode);
-      OpNode = new TreePatternNode(Xform, Children, OpNode->getNumTypes());
+      OpNode = std::make_shared<TreePatternNode>(Xform, std::move(Children),
+                                                 OpNode->getNumTypes());
     }
 
-    ResultNodeOperands.push_back(OpNode);
+    ResultNodeOperands.push_back(std::move(OpNode));
   }
 
-  if (!InstInputsCheck.empty())
-    I->error("Input operand $" + InstInputsCheck.begin()->first +
-             " occurs in pattern but not in operands list!");
+  if (!InstInputs.empty())
+    I.error("Input operand $" + InstInputs.begin()->first +
+            " occurs in pattern but not in operands list!");
 
-  TreePatternNode *ResultPattern =
-    new TreePatternNode(I->getRecord(), ResultNodeOperands,
-                        GetNumNodeResults(I->getRecord(), *this));
+  TreePatternNodePtr ResultPattern = std::make_shared<TreePatternNode>(
+      I.getRecord(), std::move(ResultNodeOperands),
+      GetNumNodeResults(I.getRecord(), *this));
   // Copy fully inferred output node types to instruction result pattern.
   for (unsigned i = 0; i != NumResults; ++i) {
     assert(ResNodes[i]->getNumTypes() == 1 && "FIXME: Unhandled");
     ResultPattern->setType(i, ResNodes[i]->getExtType(0));
   }
 
+  // FIXME: Assume only the first tree is the pattern. The others are clobber
+  // nodes.
+  TreePatternNodePtr Pattern = I.getTree(0);
+  TreePatternNodePtr SrcPattern;
+  if (Pattern->getOperator()->getName() == "set") {
+    SrcPattern = Pattern->getChild(Pattern->getNumChildren()-1)->clone();
+  } else{
+    // Not a set (store or something?)
+    SrcPattern = Pattern;
+  }
+
   // Create and insert the instruction.
   // FIXME: InstImpResults should not be part of DAGInstruction.
-  DAGInstruction TheInst(I, Results, Operands, InstImpResults);
-  DAGInsts.insert(std::make_pair(I->getRecord(), TheInst));
-
-  // Use a temporary tree pattern to infer all types and make sure that the
-  // constructed result is correct.  This depends on the instruction already
-  // being inserted into the DAGInsts map.
-  TreePattern Temp(I->getRecord(), ResultPattern, false, *this);
-  Temp.InferAllTypes(&I->getNamedNodesMap());
+  Record *R = I.getRecord();
+  DAGInsts.emplace(std::piecewise_construct, std::forward_as_tuple(R),
+                   std::forward_as_tuple(Results, Operands, InstImpResults,
+                                         SrcPattern, ResultPattern));
 
-  DAGInstruction &TheInsertedInst = DAGInsts.find(I->getRecord())->second;
-  TheInsertedInst.setResultPattern(Temp.getOnlyTree());
-
-  return TheInsertedInst;
+  LLVM_DEBUG(I.dump());
 }
 
 /// ParseInstructions - Parse all of the instructions, inlining and resolving
@@ -3622,51 +3683,32 @@ void CodeGenDAGPatterns::ParseInstructions() {
       // Create and insert the instruction.
       std::vector<Record*> ImpResults;
       Instructions.insert(std::make_pair(Instr,
-                          DAGInstruction(nullptr, Results, Operands, ImpResults)));
+                            DAGInstruction(Results, Operands, ImpResults)));
       continue;  // no pattern.
     }
 
     CodeGenInstruction &CGI = Target.getInstruction(Instr);
-    const DAGInstruction &DI = parseInstructionPattern(CGI, LI, Instructions);
-
-    (void)DI;
-    DEBUG(DI.getPattern()->dump());
+    parseInstructionPattern(CGI, LI, Instructions);
   }
 
   // If we can, convert the instructions to be patterns that are matched!
   for (auto &Entry : Instructions) {
+    Record *Instr = Entry.first;
     DAGInstruction &TheInst = Entry.second;
-    TreePattern *I = TheInst.getPattern();
-    if (!I) continue;  // No pattern.
-
-    if (PatternRewriter)
-      PatternRewriter(I);
-    // FIXME: Assume only the first tree is the pattern. The others are clobber
-    // nodes.
-    TreePatternNode *Pattern = I->getTree(0);
-    TreePatternNode *SrcPattern;
-    if (Pattern->getOperator()->getName() == "set") {
-      SrcPattern = Pattern->getChild(Pattern->getNumChildren()-1)->clone();
-    } else{
-      // Not a set (store or something?)
-      SrcPattern = Pattern;
-    }
+    TreePatternNodePtr SrcPattern = TheInst.getSrcPattern();
+    TreePatternNodePtr ResultPattern = TheInst.getResultPattern();
 
-    Record *Instr = Entry.first;
-    ListInit *Preds = Instr->getValueAsListInit("Predicates");
-    int Complexity = Instr->getValueAsInt("AddedComplexity");
-    AddPatternToMatch(
-        I,
-        PatternToMatch(Instr, makePredList(Preds), SrcPattern,
-                       TheInst.getResultPattern(), TheInst.getImpResults(),
-                       Complexity, Instr->getID()));
+    if (SrcPattern && ResultPattern) {
+      TreePattern Pattern(Instr, SrcPattern, true, *this);
+      TreePattern Result(Instr, ResultPattern, false, *this);
+      ParseOnePattern(Instr, Pattern, Result, TheInst.getImpResults());
+    }
   }
 }
 
+typedef std::pair<TreePatternNode *, unsigned> NameRecord;
 
-typedef std::pair<const TreePatternNode*, unsigned> NameRecord;
-
-static void FindNames(const TreePatternNode *P,
+static void FindNames(TreePatternNode *P,
                       std::map<std::string, NameRecord> &Names,
                       TreePattern *PatternTop) {
   if (!P->getName().empty()) {
@@ -3695,7 +3737,7 @@ std::vector<Predicate> CodeGenDAGPatterns::makePredList(ListInit *L) {
   }
 
   // Sort so that different orders get canonicalized to the same string.
-  std::sort(Preds.begin(), Preds.end());
+  llvm::sort(Preds.begin(), Preds.end());
   return Preds;
 }
 
@@ -3739,34 +3781,18 @@ void CodeGenDAGPatterns::AddPatternToMatch(TreePattern *Pattern,
         SrcNames[Entry.first].second == 1)
       Pattern->error("Pattern has dead named input: $" + Entry.first);
 
-  PatternsToMatch.push_back(std::move(PTM));
+  PatternsToMatch.push_back(PTM);
 }
 
 void CodeGenDAGPatterns::InferInstructionFlags() {
   ArrayRef<const CodeGenInstruction*> Instructions =
     Target.getInstructionsByEnumValue();
 
-  // First try to infer flags from the primary instruction pattern, if any.
-  SmallVector<CodeGenInstruction*, 8> Revisit;
   unsigned Errors = 0;
-  for (unsigned i = 0, e = Instructions.size(); i != e; ++i) {
-    CodeGenInstruction &InstInfo =
-      const_cast<CodeGenInstruction &>(*Instructions[i]);
-
-    // Get the primary instruction pattern.
-    const TreePattern *Pattern = getInstruction(InstInfo.TheDef).getPattern();
-    if (!Pattern) {
-      if (InstInfo.hasUndefFlags())
-        Revisit.push_back(&InstInfo);
-      continue;
-    }
-    InstAnalyzer PatInfo(*this);
-    PatInfo.Analyze(Pattern);
-    Errors += InferFromPattern(InstInfo, PatInfo, InstInfo.TheDef);
-  }
 
-  // Second, look for single-instruction patterns defined outside the
-  // instruction.
+  // Try to infer flags from all patterns in PatternToMatch.  These include
+  // both the primary instruction patterns (which always come first) and
+  // patterns defined outside the instruction.
   for (const PatternToMatch &PTM : ptms()) {
     // We can only infer from single-instruction patterns, otherwise we won't
     // know which instruction should get the flags.
@@ -3790,9 +3816,11 @@ void CodeGenDAGPatterns::InferInstructionFlags() {
   if (Errors)
     PrintFatalError("pattern conflicts");
 
-  // Revisit instructions with undefined flags and no pattern.
+  // If requested by the target, guess any undefined properties.
   if (Target.guessInstructionProperties()) {
-    for (CodeGenInstruction *InstInfo : Revisit) {
+    for (unsigned i = 0, e = Instructions.size(); i != e; ++i) {
+      CodeGenInstruction *InstInfo =
+        const_cast<CodeGenInstruction *>(Instructions[i]);
       if (InstInfo->InferredFrom)
         continue;
       // The mayLoad and mayStore flags default to false.
@@ -3804,7 +3832,9 @@ void CodeGenDAGPatterns::InferInstructionFlags() {
   }
 
   // Complain about any flags that are still undefined.
-  for (CodeGenInstruction *InstInfo : Revisit) {
+  for (unsigned i = 0, e = Instructions.size(); i != e; ++i) {
+    CodeGenInstruction *InstInfo =
+      const_cast<CodeGenInstruction *>(Instructions[i]);
     if (InstInfo->InferredFrom)
       continue;
     if (InstInfo->hasSideEffects_Unset)
@@ -3916,6 +3946,122 @@ static bool ForceArbitraryInstResultType(TreePatternNode *N, TreePattern &TP) {
   return false;
 }
 
+void CodeGenDAGPatterns::ParseOnePattern(Record *TheDef,
+       TreePattern &Pattern, TreePattern &Result,
+       const std::vector<Record *> &InstImpResults) {
+
+  // Inline pattern fragments and expand multiple alternatives.
+  Pattern.InlinePatternFragments();
+  Result.InlinePatternFragments();
+
+  if (Result.getNumTrees() != 1)
+    Result.error("Cannot use multi-alternative fragments in result pattern!");
+
+  // Infer types.
+  bool IterateInference;
+  bool InferredAllPatternTypes, InferredAllResultTypes;
+  do {
+    // Infer as many types as possible.  If we cannot infer all of them, we
+    // can never do anything with this pattern: report it to the user.
+    InferredAllPatternTypes =
+        Pattern.InferAllTypes(&Pattern.getNamedNodesMap());
+
+    // Infer as many types as possible.  If we cannot infer all of them, we
+    // can never do anything with this pattern: report it to the user.
+    InferredAllResultTypes =
+        Result.InferAllTypes(&Pattern.getNamedNodesMap());
+
+    IterateInference = false;
+
+    // Apply the type of the result to the source pattern.  This helps us
+    // resolve cases where the input type is known to be a pointer type (which
+    // is considered resolved), but the result knows it needs to be 32- or
+    // 64-bits.  Infer the other way for good measure.
+    for (auto T : Pattern.getTrees())
+      for (unsigned i = 0, e = std::min(Result.getOnlyTree()->getNumTypes(),
+                                        T->getNumTypes());
+         i != e; ++i) {
+        IterateInference |= T->UpdateNodeType(
+            i, Result.getOnlyTree()->getExtType(i), Result);
+        IterateInference |= Result.getOnlyTree()->UpdateNodeType(
+            i, T->getExtType(i), Result);
+      }
+
+    // If our iteration has converged and the input pattern's types are fully
+    // resolved but the result pattern is not fully resolved, we may have a
+    // situation where we have two instructions in the result pattern and
+    // the instructions require a common register class, but don't care about
+    // what actual MVT is used.  This is actually a bug in our modelling:
+    // output patterns should have register classes, not MVTs.
+    //
+    // In any case, to handle this, we just go through and disambiguate some
+    // arbitrary types to the result pattern's nodes.
+    if (!IterateInference && InferredAllPatternTypes &&
+        !InferredAllResultTypes)
+      IterateInference =
+          ForceArbitraryInstResultType(Result.getTree(0).get(), Result);
+  } while (IterateInference);
+
+  // Verify that we inferred enough types that we can do something with the
+  // pattern and result.  If these fire the user has to add type casts.
+  if (!InferredAllPatternTypes)
+    Pattern.error("Could not infer all types in pattern!");
+  if (!InferredAllResultTypes) {
+    Pattern.dump();
+    Result.error("Could not infer all types in pattern result!");
+  }
+
+  // Promote the xform function to be an explicit node if set.
+  const TreePatternNodePtr &DstPattern = Result.getOnlyTree();
+  std::vector<TreePatternNodePtr> ResultNodeOperands;
+  for (unsigned ii = 0, ee = DstPattern->getNumChildren(); ii != ee; ++ii) {
+    TreePatternNodePtr OpNode = DstPattern->getChildShared(ii);
+    if (Record *Xform = OpNode->getTransformFn()) {
+      OpNode->setTransformFn(nullptr);
+      std::vector<TreePatternNodePtr> Children;
+      Children.push_back(OpNode);
+      OpNode = std::make_shared<TreePatternNode>(Xform, std::move(Children),
+                                                 OpNode->getNumTypes());
+    }
+    ResultNodeOperands.push_back(OpNode);
+  }
+
+  TreePatternNodePtr DstShared =
+      DstPattern->isLeaf()
+          ? DstPattern
+          : std::make_shared<TreePatternNode>(DstPattern->getOperator(),
+                                              std::move(ResultNodeOperands),
+                                              DstPattern->getNumTypes());
+
+  for (unsigned i = 0, e = Result.getOnlyTree()->getNumTypes(); i != e; ++i)
+    DstShared->setType(i, Result.getOnlyTree()->getExtType(i));
+
+  TreePattern Temp(Result.getRecord(), DstShared, false, *this);
+  Temp.InferAllTypes();
+
+  ListInit *Preds = TheDef->getValueAsListInit("Predicates");
+  int Complexity = TheDef->getValueAsInt("AddedComplexity");
+
+  if (PatternRewriter)
+    PatternRewriter(&Pattern);
+
+  // A pattern may end up with an "impossible" type, i.e. a situation
+  // where all types have been eliminated for some node in this pattern.
+  // This could occur for intrinsics that only make sense for a specific
+  // value type, and use a specific register class. If, for some mode,
+  // that register class does not accept that type, the type inference
+  // will lead to a contradiction, which is not an error however, but
+  // a sign that this pattern will simply never match.
+  if (Temp.getOnlyTree()->hasPossibleType())
+    for (auto T : Pattern.getTrees())
+      if (T->hasPossibleType())
+        AddPatternToMatch(&Pattern,
+                          PatternToMatch(TheDef, makePredList(Preds),
+                                         T, Temp.getOnlyTree(),
+                                         InstImpResults, Complexity,
+                                         TheDef->getID()));
+}
+
 void CodeGenDAGPatterns::ParsePatterns() {
   std::vector<Record*> Patterns = Records.getAllDerivedDefinitions("Pattern");
 
@@ -3926,10 +4072,7 @@ void CodeGenDAGPatterns::ParsePatterns() {
     if (hasNullFragReference(Tree))
       continue;
 
-    TreePattern *Pattern = new TreePattern(CurPattern, Tree, true, *this);
-
-    // Inline pattern fragments into it.
-    Pattern->InlinePatternFragments();
+    TreePattern Pattern(CurPattern, Tree, true, *this);
 
     ListInit *LI = CurPattern->getValueAsListInit("ResultInstrs");
     if (LI->empty()) continue;  // no pattern.
@@ -3937,119 +4080,19 @@ void CodeGenDAGPatterns::ParsePatterns() {
     // Parse the instruction.
     TreePattern Result(CurPattern, LI, false, *this);
 
-    // Inline pattern fragments into it.
-    Result.InlinePatternFragments();
-
     if (Result.getNumTrees() != 1)
       Result.error("Cannot handle instructions producing instructions "
                    "with temporaries yet!");
 
-    bool IterateInference;
-    bool InferredAllPatternTypes, InferredAllResultTypes;
-    do {
-      // Infer as many types as possible.  If we cannot infer all of them, we
-      // can never do anything with this pattern: report it to the user.
-      InferredAllPatternTypes =
-        Pattern->InferAllTypes(&Pattern->getNamedNodesMap());
-
-      // Infer as many types as possible.  If we cannot infer all of them, we
-      // can never do anything with this pattern: report it to the user.
-      InferredAllResultTypes =
-          Result.InferAllTypes(&Pattern->getNamedNodesMap());
-
-      IterateInference = false;
-
-      // Apply the type of the result to the source pattern.  This helps us
-      // resolve cases where the input type is known to be a pointer type (which
-      // is considered resolved), but the result knows it needs to be 32- or
-      // 64-bits.  Infer the other way for good measure.
-      for (unsigned i = 0, e = std::min(Result.getTree(0)->getNumTypes(),
-                                        Pattern->getTree(0)->getNumTypes());
-           i != e; ++i) {
-        IterateInference = Pattern->getTree(0)->UpdateNodeType(
-            i, Result.getTree(0)->getExtType(i), Result);
-        IterateInference |= Result.getTree(0)->UpdateNodeType(
-            i, Pattern->getTree(0)->getExtType(i), Result);
-      }
-
-      // If our iteration has converged and the input pattern's types are fully
-      // resolved but the result pattern is not fully resolved, we may have a
-      // situation where we have two instructions in the result pattern and
-      // the instructions require a common register class, but don't care about
-      // what actual MVT is used.  This is actually a bug in our modelling:
-      // output patterns should have register classes, not MVTs.
-      //
-      // In any case, to handle this, we just go through and disambiguate some
-      // arbitrary types to the result pattern's nodes.
-      if (!IterateInference && InferredAllPatternTypes &&
-          !InferredAllResultTypes)
-        IterateInference =
-            ForceArbitraryInstResultType(Result.getTree(0), Result);
-    } while (IterateInference);
-
-    // Verify that we inferred enough types that we can do something with the
-    // pattern and result.  If these fire the user has to add type casts.
-    if (!InferredAllPatternTypes)
-      Pattern->error("Could not infer all types in pattern!");
-    if (!InferredAllResultTypes) {
-      Pattern->dump();
-      Result.error("Could not infer all types in pattern result!");
-    }
-
     // Validate that the input pattern is correct.
-    std::map<std::string, TreePatternNode*> InstInputs;
-    std::map<std::string, TreePatternNode*> InstResults;
+    std::map<std::string, TreePatternNodePtr> InstInputs;
+    std::map<std::string, TreePatternNodePtr> InstResults;
     std::vector<Record*> InstImpResults;
-    for (unsigned j = 0, ee = Pattern->getNumTrees(); j != ee; ++j)
-      FindPatternInputsAndOutputs(Pattern, Pattern->getTree(j),
-                                  InstInputs, InstResults,
-                                  InstImpResults);
+    for (unsigned j = 0, ee = Pattern.getNumTrees(); j != ee; ++j)
+      FindPatternInputsAndOutputs(Pattern, Pattern.getTree(j), InstInputs,
+                                  InstResults, InstImpResults);
 
-    // Promote the xform function to be an explicit node if set.
-    TreePatternNode *DstPattern = Result.getOnlyTree();
-    std::vector<TreePatternNode*> ResultNodeOperands;
-    for (unsigned ii = 0, ee = DstPattern->getNumChildren(); ii != ee; ++ii) {
-      TreePatternNode *OpNode = DstPattern->getChild(ii);
-      if (Record *Xform = OpNode->getTransformFn()) {
-        OpNode->setTransformFn(nullptr);
-        std::vector<TreePatternNode*> Children;
-        Children.push_back(OpNode);
-        OpNode = new TreePatternNode(Xform, Children, OpNode->getNumTypes());
-      }
-      ResultNodeOperands.push_back(OpNode);
-    }
-    DstPattern = Result.getOnlyTree();
-    if (!DstPattern->isLeaf())
-      DstPattern = new TreePatternNode(DstPattern->getOperator(),
-                                       ResultNodeOperands,
-                                       DstPattern->getNumTypes());
-
-    for (unsigned i = 0, e = Result.getOnlyTree()->getNumTypes(); i != e; ++i)
-      DstPattern->setType(i, Result.getOnlyTree()->getExtType(i));
-
-    TreePattern Temp(Result.getRecord(), DstPattern, false, *this);
-    Temp.InferAllTypes();
-
-    // A pattern may end up with an "impossible" type, i.e. a situation
-    // where all types have been eliminated for some node in this pattern.
-    // This could occur for intrinsics that only make sense for a specific
-    // value type, and use a specific register class. If, for some mode,
-    // that register class does not accept that type, the type inference
-    // will lead to a contradiction, which is not an error however, but
-    // a sign that this pattern will simply never match.
-    if (Pattern->getTree(0)->hasPossibleType() &&
-        Temp.getOnlyTree()->hasPossibleType()) {
-      ListInit *Preds = CurPattern->getValueAsListInit("Predicates");
-      int Complexity = CurPattern->getValueAsInt("AddedComplexity");
-      if (PatternRewriter)
-        PatternRewriter(Pattern);
-      AddPatternToMatch(
-          Pattern,
-          PatternToMatch(
-              CurPattern, makePredList(Preds), Pattern->getTree(0),
-              Temp.getOnlyTree(), std::move(InstImpResults), Complexity,
-              CurPattern->getID()));
-    }
+    ParseOnePattern(CurPattern, Pattern, Result, InstImpResults);
   }
 }
 
@@ -4068,25 +4111,24 @@ void CodeGenDAGPatterns::ExpandHwModeBasedTypes() {
   std::vector<PatternToMatch> Copy = PatternsToMatch;
   PatternsToMatch.clear();
 
-  auto AppendPattern = [this,&ModeChecks](PatternToMatch &P, unsigned Mode) {
-    TreePatternNode *NewSrc = P.SrcPattern->clone();
-    TreePatternNode *NewDst = P.DstPattern->clone();
+  auto AppendPattern = [this, &ModeChecks](PatternToMatch &P, unsigned Mode) {
+    TreePatternNodePtr NewSrc = P.SrcPattern->clone();
+    TreePatternNodePtr NewDst = P.DstPattern->clone();
     if (!NewSrc->setDefaultMode(Mode) || !NewDst->setDefaultMode(Mode)) {
-      delete NewSrc;
-      delete NewDst;
       return;
     }
 
     std::vector<Predicate> Preds = P.Predicates;
     const std::vector<Predicate> &MC = ModeChecks[Mode];
     Preds.insert(Preds.end(), MC.begin(), MC.end());
-    PatternsToMatch.emplace_back(P.getSrcRecord(), Preds, NewSrc, NewDst,
-                                 P.getDstRegs(), P.getAddedComplexity(),
-                                 Record::getNewUID(), Mode);
+    PatternsToMatch.emplace_back(P.getSrcRecord(), Preds, std::move(NewSrc),
+                                 std::move(NewDst), P.getDstRegs(),
+                                 P.getAddedComplexity(), Record::getNewUID(),
+                                 Mode);
   };
 
   for (PatternToMatch &P : Copy) {
-    TreePatternNode *SrcP = nullptr, *DstP = nullptr;
+    TreePatternNodePtr SrcP = nullptr, DstP = nullptr;
     if (P.SrcPattern->hasProperTypeByHwMode())
       SrcP = P.SrcPattern;
     if (P.DstPattern->hasProperTypeByHwMode())
@@ -4098,9 +4140,9 @@ void CodeGenDAGPatterns::ExpandHwModeBasedTypes() {
 
     std::set<unsigned> Modes;
     if (SrcP)
-      collectModes(Modes, SrcP);
+      collectModes(Modes, SrcP.get());
     if (DstP)
-      collectModes(Modes, DstP);
+      collectModes(Modes, DstP.get());
 
     // The predicate for the default mode needs to be constructed for each
     // pattern separately.
@@ -4168,13 +4210,13 @@ static void FindDepVars(TreePatternNode *N, MultipleUseVarSet &DepVars) {
 /// Dump the dependent variable set:
 static void DumpDepVars(MultipleUseVarSet &DepVars) {
   if (DepVars.empty()) {
-    DEBUG(errs() << "<empty set>");
+    LLVM_DEBUG(errs() << "<empty set>");
   } else {
-    DEBUG(errs() << "[ ");
+    LLVM_DEBUG(errs() << "[ ");
     for (const auto &DepVar : DepVars) {
-      DEBUG(errs() << DepVar.getKey() << " ");
+      LLVM_DEBUG(errs() << DepVar.getKey() << " ");
     }
-    DEBUG(errs() << "]");
+    LLVM_DEBUG(errs() << "]");
   }
 }
 #endif
@@ -4182,11 +4224,11 @@ static void DumpDepVars(MultipleUseVarSet &DepVars) {
 
 /// CombineChildVariants - Given a bunch of permutations of each child of the
 /// 'operator' node, put them together in all possible ways.
-static void CombineChildVariants(TreePatternNode *Orig,
-               const std::vector<std::vector<TreePatternNode*> > &ChildVariants,
-                                 std::vector<TreePatternNode*> &OutVariants,
-                                 CodeGenDAGPatterns &CDP,
-                                 const MultipleUseVarSet &DepVars) {
+static void CombineChildVariants(
+    TreePatternNodePtr Orig,
+    const std::vector<std::vector<TreePatternNodePtr>> &ChildVariants,
+    std::vector<TreePatternNodePtr> &OutVariants, CodeGenDAGPatterns &CDP,
+    const MultipleUseVarSet &DepVars) {
   // Make sure that each operand has at least one variant to choose from.
   for (const auto &Variants : ChildVariants)
     if (Variants.empty())
@@ -4198,20 +4240,20 @@ static void CombineChildVariants(TreePatternNode *Orig,
   bool NotDone;
   do {
 #ifndef NDEBUG
-    DEBUG(if (!Idxs.empty()) {
-            errs() << Orig->getOperator()->getName() << ": Idxs = [ ";
-              for (unsigned Idx : Idxs) {
-                errs() << Idx << " ";
-            }
-            errs() << "]\n";
-          });
+    LLVM_DEBUG(if (!Idxs.empty()) {
+      errs() << Orig->getOperator()->getName() << ": Idxs = [ ";
+      for (unsigned Idx : Idxs) {
+        errs() << Idx << " ";
+      }
+      errs() << "]\n";
+    });
 #endif
     // Create the variant and add it to the output list.
-    std::vector<TreePatternNode*> NewChildren;
+    std::vector<TreePatternNodePtr> NewChildren;
     for (unsigned i = 0, e = ChildVariants.size(); i != e; ++i)
       NewChildren.push_back(ChildVariants[i][Idxs[i]]);
-    auto R = llvm::make_unique<TreePatternNode>(
-        Orig->getOperator(), NewChildren, Orig->getNumTypes());
+    TreePatternNodePtr R = std::make_shared<TreePatternNode>(
+        Orig->getOperator(), std::move(NewChildren), Orig->getNumTypes());
 
     // Copy over properties.
     R->setName(Orig->getName());
@@ -4227,10 +4269,10 @@ static void CombineChildVariants(TreePatternNode *Orig,
     //   (and GPRC:$a, GPRC:$b) -> (and GPRC:$b, GPRC:$a)
     // which are the same pattern.  Ignore the dups.
     if (R->canPatternMatch(ErrString, CDP) &&
-        none_of(OutVariants, [&](TreePatternNode *Variant) {
-          return R->isIsomorphicTo(Variant, DepVars);
+        none_of(OutVariants, [&](TreePatternNodePtr Variant) {
+          return R->isIsomorphicTo(Variant.get(), DepVars);
         }))
-      OutVariants.push_back(R.release());
+      OutVariants.push_back(R);
 
     // Increment indices to the next permutation by incrementing the
     // indices from last index backward, e.g., generate the sequence
@@ -4248,21 +4290,21 @@ static void CombineChildVariants(TreePatternNode *Orig,
 
 /// CombineChildVariants - A helper function for binary operators.
 ///
-static void CombineChildVariants(TreePatternNode *Orig,
-                                 const std::vector<TreePatternNode*> &LHS,
-                                 const std::vector<TreePatternNode*> &RHS,
-                                 std::vector<TreePatternNode*> &OutVariants,
+static void CombineChildVariants(TreePatternNodePtr Orig,
+                                 const std::vector<TreePatternNodePtr> &LHS,
+                                 const std::vector<TreePatternNodePtr> &RHS,
+                                 std::vector<TreePatternNodePtr> &OutVariants,
                                  CodeGenDAGPatterns &CDP,
                                  const MultipleUseVarSet &DepVars) {
-  std::vector<std::vector<TreePatternNode*> > ChildVariants;
+  std::vector<std::vector<TreePatternNodePtr>> ChildVariants;
   ChildVariants.push_back(LHS);
   ChildVariants.push_back(RHS);
   CombineChildVariants(Orig, ChildVariants, OutVariants, CDP, DepVars);
 }
 
-
-static void GatherChildrenOfAssociativeOpcode(TreePatternNode *N,
-                                     std::vector<TreePatternNode *> &Children) {
+static void
+GatherChildrenOfAssociativeOpcode(TreePatternNodePtr N,
+                                  std::vector<TreePatternNodePtr> &Children) {
   assert(N->getNumChildren()==2 &&"Associative but doesn't have 2 children!");
   Record *Operator = N->getOperator();
 
@@ -4274,21 +4316,21 @@ static void GatherChildrenOfAssociativeOpcode(TreePatternNode *N,
   }
 
   if (N->getChild(0)->isLeaf() || N->getChild(0)->getOperator() != Operator)
-    Children.push_back(N->getChild(0));
+    Children.push_back(N->getChildShared(0));
   else
-    GatherChildrenOfAssociativeOpcode(N->getChild(0), Children);
+    GatherChildrenOfAssociativeOpcode(N->getChildShared(0), Children);
 
   if (N->getChild(1)->isLeaf() || N->getChild(1)->getOperator() != Operator)
-    Children.push_back(N->getChild(1));
+    Children.push_back(N->getChildShared(1));
   else
-    GatherChildrenOfAssociativeOpcode(N->getChild(1), Children);
+    GatherChildrenOfAssociativeOpcode(N->getChildShared(1), Children);
 }
 
 /// GenerateVariantsOf - Given a pattern N, generate all permutations we can of
 /// the (potentially recursive) pattern by using algebraic laws.
 ///
-static void GenerateVariantsOf(TreePatternNode *N,
-                               std::vector<TreePatternNode*> &OutVariants,
+static void GenerateVariantsOf(TreePatternNodePtr N,
+                               std::vector<TreePatternNodePtr> &OutVariants,
                                CodeGenDAGPatterns &CDP,
                                const MultipleUseVarSet &DepVars) {
   // We cannot permute leaves or ComplexPattern uses.
@@ -4303,14 +4345,14 @@ static void GenerateVariantsOf(TreePatternNode *N,
   // If this node is associative, re-associate.
   if (NodeInfo.hasProperty(SDNPAssociative)) {
     // Re-associate by pulling together all of the linked operators
-    std::vector<TreePatternNode*> MaximalChildren;
+    std::vector<TreePatternNodePtr> MaximalChildren;
     GatherChildrenOfAssociativeOpcode(N, MaximalChildren);
 
     // Only handle child sizes of 3.  Otherwise we'll end up trying too many
     // permutations.
     if (MaximalChildren.size() == 3) {
       // Find the variants of all of our maximal children.
-      std::vector<TreePatternNode*> AVariants, BVariants, CVariants;
+      std::vector<TreePatternNodePtr> AVariants, BVariants, CVariants;
       GenerateVariantsOf(MaximalChildren[0], AVariants, CDP, DepVars);
       GenerateVariantsOf(MaximalChildren[1], BVariants, CDP, DepVars);
       GenerateVariantsOf(MaximalChildren[2], CVariants, CDP, DepVars);
@@ -4320,12 +4362,12 @@ static void GenerateVariantsOf(TreePatternNode *N,
       // Within these forms, we can also permute A/B/C.
 
       // Generate legal pair permutations of A/B/C.
-      std::vector<TreePatternNode*> ABVariants;
-      std::vector<TreePatternNode*> BAVariants;
-      std::vector<TreePatternNode*> ACVariants;
-      std::vector<TreePatternNode*> CAVariants;
-      std::vector<TreePatternNode*> BCVariants;
-      std::vector<TreePatternNode*> CBVariants;
+      std::vector<TreePatternNodePtr> ABVariants;
+      std::vector<TreePatternNodePtr> BAVariants;
+      std::vector<TreePatternNodePtr> ACVariants;
+      std::vector<TreePatternNodePtr> CAVariants;
+      std::vector<TreePatternNodePtr> BCVariants;
+      std::vector<TreePatternNodePtr> CBVariants;
       CombineChildVariants(N, AVariants, BVariants, ABVariants, CDP, DepVars);
       CombineChildVariants(N, BVariants, AVariants, BAVariants, CDP, DepVars);
       CombineChildVariants(N, AVariants, CVariants, ACVariants, CDP, DepVars);
@@ -4353,10 +4395,10 @@ static void GenerateVariantsOf(TreePatternNode *N,
   }
 
   // Compute permutations of all children.
-  std::vector<std::vector<TreePatternNode*> > ChildVariants;
+  std::vector<std::vector<TreePatternNodePtr>> ChildVariants;
   ChildVariants.resize(N->getNumChildren());
   for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i)
-    GenerateVariantsOf(N->getChild(i), ChildVariants[i], CDP, DepVars);
+    GenerateVariantsOf(N->getChildShared(i), ChildVariants[i], CDP, DepVars);
 
   // Build all permutations based on how the children were formed.
   CombineChildVariants(N, ChildVariants, OutVariants, CDP, DepVars);
@@ -4385,19 +4427,19 @@ static void GenerateVariantsOf(TreePatternNode *N,
       // after those.
       assert(NC >= 3 &&
              "Commutative intrinsic should have at least 3 children!");
-      std::vector<std::vector<TreePatternNode*> > Variants;
-      Variants.push_back(ChildVariants[0]); // Intrinsic id.
-      Variants.push_back(ChildVariants[2]);
-      Variants.push_back(ChildVariants[1]);
+      std::vector<std::vector<TreePatternNodePtr>> Variants;
+      Variants.push_back(std::move(ChildVariants[0])); // Intrinsic id.
+      Variants.push_back(std::move(ChildVariants[2]));
+      Variants.push_back(std::move(ChildVariants[1]));
       for (unsigned i = 3; i != NC; ++i)
-        Variants.push_back(ChildVariants[i]);
+        Variants.push_back(std::move(ChildVariants[i]));
       CombineChildVariants(N, Variants, OutVariants, CDP, DepVars);
     } else if (NC == N->getNumChildren()) {
-      std::vector<std::vector<TreePatternNode*> > Variants;
-      Variants.push_back(ChildVariants[1]);
-      Variants.push_back(ChildVariants[0]);
+      std::vector<std::vector<TreePatternNodePtr>> Variants;
+      Variants.push_back(std::move(ChildVariants[1]));
+      Variants.push_back(std::move(ChildVariants[0]));
       for (unsigned i = 2; i != NC; ++i)
-        Variants.push_back(ChildVariants[i]);
+        Variants.push_back(std::move(ChildVariants[i]));
       CombineChildVariants(N, Variants, OutVariants, CDP, DepVars);
     }
   }
@@ -4407,7 +4449,7 @@ static void GenerateVariantsOf(TreePatternNode *N,
 // GenerateVariants - Generate variants.  For example, commutative patterns can
 // match multiple ways.  Add them to PatternsToMatch as well.
 void CodeGenDAGPatterns::GenerateVariants() {
-  DEBUG(errs() << "Generating instruction variants.\n");
+  LLVM_DEBUG(errs() << "Generating instruction variants.\n");
 
   // Loop over all of the patterns we've collected, checking to see if we can
   // generate variants of the instruction, through the exploitation of
@@ -4420,28 +4462,26 @@ void CodeGenDAGPatterns::GenerateVariants() {
   //
   for (unsigned i = 0, e = PatternsToMatch.size(); i != e; ++i) {
     MultipleUseVarSet             DepVars;
-    std::vector<TreePatternNode*> Variants;
+    std::vector<TreePatternNodePtr> Variants;
     FindDepVars(PatternsToMatch[i].getSrcPattern(), DepVars);
-    DEBUG(errs() << "Dependent/multiply used variables: ");
-    DEBUG(DumpDepVars(DepVars));
-    DEBUG(errs() << "\n");
-    GenerateVariantsOf(PatternsToMatch[i].getSrcPattern(), Variants, *this,
-                       DepVars);
+    LLVM_DEBUG(errs() << "Dependent/multiply used variables: ");
+    LLVM_DEBUG(DumpDepVars(DepVars));
+    LLVM_DEBUG(errs() << "\n");
+    GenerateVariantsOf(PatternsToMatch[i].getSrcPatternShared(), Variants,
+                       *this, DepVars);
 
     assert(!Variants.empty() && "Must create at least original variant!");
     if (Variants.size() == 1)  // No additional variants for this pattern.
       continue;
 
-    DEBUG(errs() << "FOUND VARIANTS OF: ";
-          PatternsToMatch[i].getSrcPattern()->dump();
-          errs() << "\n");
+    LLVM_DEBUG(errs() << "FOUND VARIANTS OF: ";
+               PatternsToMatch[i].getSrcPattern()->dump(); errs() << "\n");
 
     for (unsigned v = 0, e = Variants.size(); v != e; ++v) {
-      TreePatternNode *Variant = Variants[v];
+      TreePatternNodePtr Variant = Variants[v];
 
-      DEBUG(errs() << "  VAR#" << v <<  ": ";
-            Variant->dump();
-            errs() << "\n");
+      LLVM_DEBUG(errs() << "  VAR#" << v << ": "; Variant->dump();
+                 errs() << "\n");
 
       // Scan to see if an instruction or explicit pattern already matches this.
       bool AlreadyExists = false;
@@ -4453,7 +4493,7 @@ void CodeGenDAGPatterns::GenerateVariants() {
         // Check to see if this variant already exists.
         if (Variant->isIsomorphicTo(PatternsToMatch[p].getSrcPattern(),
                                     DepVars)) {
-          DEBUG(errs() << "  *** ALREADY EXISTS, ignoring variant.\n");
+          LLVM_DEBUG(errs() << "  *** ALREADY EXISTS, ignoring variant.\n");
           AlreadyExists = true;
           break;
         }
@@ -4464,11 +4504,11 @@ void CodeGenDAGPatterns::GenerateVariants() {
       // Otherwise, add it to the list of patterns we have.
       PatternsToMatch.push_back(PatternToMatch(
           PatternsToMatch[i].getSrcRecord(), PatternsToMatch[i].getPredicates(),
-          Variant, PatternsToMatch[i].getDstPattern(),
+          Variant, PatternsToMatch[i].getDstPatternShared(),
           PatternsToMatch[i].getDstRegs(),
           PatternsToMatch[i].getAddedComplexity(), Record::getNewUID()));
     }
 
-    DEBUG(errs() << "\n");
+    LLVM_DEBUG(errs() << "\n");
   }
 }
diff --git a/utils/TableGen/CodeGenDAGPatterns.h b/utils/TableGen/CodeGenDAGPatterns.h
index 8a8132c7f894..9be3816cc7fc 100644
--- a/utils/TableGen/CodeGenDAGPatterns.h
+++ b/utils/TableGen/CodeGenDAGPatterns.h
@@ -43,6 +43,9 @@ class TreePatternNode;
 class CodeGenDAGPatterns;
 class ComplexPattern;
 
+/// Shared pointer for TreePatternNode.
+using TreePatternNodePtr = std::shared_ptr<TreePatternNode>;
+
 /// This represents a set of MVTs. Since the underlying type for the MVT
 /// is uint8_t, there are at most 256 values. To reduce the number of memory
 /// allocations and deallocations, represent the set as a sequence of bits.
@@ -330,9 +333,21 @@ struct TypeInfer {
     TypeSetByHwMode &VTS;
   };
 
+  struct SuppressValidation {
+    SuppressValidation(TypeInfer &TI) : Infer(TI), SavedValidate(TI.Validate) {
+      Infer.Validate = false;
+    }
+    ~SuppressValidation() {
+      Infer.Validate = SavedValidate;
+    }
+    TypeInfer &Infer;
+    bool SavedValidate;
+  };
+
   TreePattern &TP;
   unsigned ForceMode;     // Mode to use when set.
   bool CodeGen = false;   // Set during generation of matcher code.
+  bool Validate = true;   // Indicate whether to validate types.
 
 private:
   TypeSetByHwMode getLegalTypes();
@@ -540,6 +555,10 @@ public:
   /// ValueType record for the memory VT.
   Record *getScalarMemoryVT() const;
 
+  // If true, indicates that GlobalISel-based C++ code was supplied.
+  bool hasGISelPredicateCode() const;
+  std::string getGISelPredicateCode() const;
+
 private:
   bool hasPredCode() const;
   bool hasImmCode() const;
@@ -552,9 +571,6 @@ private:
 };
 
 
-/// FIXME: TreePatternNode's can be shared in some cases (due to dag-shaped
-/// patterns), and as such should be ref counted.  We currently just leak all
-/// TreePatternNode objects!
 class TreePatternNode {
   /// The type of each node result.  Before and during type inference, each
   /// result may be a set of possible types.  After (successful) type inference,
@@ -581,18 +597,19 @@ class TreePatternNode {
   /// it can be substituted into the resulting instruction on a pattern match.
   Record *TransformFn;
 
-  std::vector<TreePatternNode*> Children;
+  std::vector<TreePatternNodePtr> Children;
+
 public:
-  TreePatternNode(Record *Op, const std::vector<TreePatternNode*> &Ch,
+  TreePatternNode(Record *Op, std::vector<TreePatternNodePtr> Ch,
                   unsigned NumResults)
-    : Operator(Op), Val(nullptr), TransformFn(nullptr), Children(Ch) {
+      : Operator(Op), Val(nullptr), TransformFn(nullptr),
+        Children(std::move(Ch)) {
     Types.resize(NumResults);
   }
   TreePatternNode(Init *val, unsigned NumResults)    // leaf ctor
     : Operator(nullptr), Val(val), TransformFn(nullptr) {
     Types.resize(NumResults);
   }
-  ~TreePatternNode();
 
   bool hasName() const { return !Name.empty(); }
   const std::string &getName() const { return Name; }
@@ -626,15 +643,17 @@ public:
   Record *getOperator() const { assert(!isLeaf()); return Operator; }
 
   unsigned getNumChildren() const { return Children.size(); }
-  TreePatternNode *getChild(unsigned N) const { return Children[N]; }
-  void setChild(unsigned i, TreePatternNode *N) {
-    Children[i] = N;
+  TreePatternNode *getChild(unsigned N) const { return Children[N].get(); }
+  const TreePatternNodePtr &getChildShared(unsigned N) const {
+    return Children[N];
   }
+  void setChild(unsigned i, TreePatternNodePtr N) { Children[i] = N; }
 
   /// hasChild - Return true if N is any of our children.
   bool hasChild(const TreePatternNode *N) const {
     for (unsigned i = 0, e = Children.size(); i != e; ++i)
-      if (Children[i] == N) return true;
+      if (Children[i].get() == N)
+        return true;
     return false;
   }
 
@@ -694,7 +713,7 @@ public:   // Higher level manipulation routines.
 
   /// clone - Return a new copy of this tree.
   ///
-  TreePatternNode *clone() const;
+  TreePatternNodePtr clone() const;
 
   /// RemoveAllTypes - Recursively strip all the types of this tree.
   void RemoveAllTypes();
@@ -708,13 +727,15 @@ public:   // Higher level manipulation routines.
 
   /// SubstituteFormalArguments - Replace the formal arguments in this tree
   /// with actual values specified by ArgMap.
-  void SubstituteFormalArguments(std::map<std::string,
-                                          TreePatternNode*> &ArgMap);
+  void
+  SubstituteFormalArguments(std::map<std::string, TreePatternNodePtr> &ArgMap);
 
   /// InlinePatternFragments - If this pattern refers to any pattern
-  /// fragments, inline them into place, giving us a pattern without any
-  /// PatFrag references.
-  TreePatternNode *InlinePatternFragments(TreePattern &TP);
+  /// fragments, return the set of inlined versions (this can be more than
+  /// one if a PatFrags record has multiple alternatives).
+  void InlinePatternFragments(TreePatternNodePtr T,
+                              TreePattern &TP,
+                              std::vector<TreePatternNodePtr> &OutAlternatives);
 
   /// ApplyTypeConstraints - Apply all of the type constraints relevant to
   /// this node and its children in the tree.  This returns true if it makes a
@@ -759,11 +780,11 @@ class TreePattern {
   /// Trees - The list of pattern trees which corresponds to this pattern.
   /// Note that PatFrag's only have a single tree.
   ///
-  std::vector<TreePatternNode*> Trees;
+  std::vector<TreePatternNodePtr> Trees;
 
   /// NamedNodes - This is all of the nodes that have names in the trees in this
   /// pattern.
-  StringMap<SmallVector<TreePatternNode*,1> > NamedNodes;
+  StringMap<SmallVector<TreePatternNode *, 1>> NamedNodes;
 
   /// TheRecord - The actual TableGen record corresponding to this pattern.
   ///
@@ -802,21 +823,21 @@ public:
               CodeGenDAGPatterns &ise);
   TreePattern(Record *TheRec, DagInit *Pat, bool isInput,
               CodeGenDAGPatterns &ise);
-  TreePattern(Record *TheRec, TreePatternNode *Pat, bool isInput,
+  TreePattern(Record *TheRec, TreePatternNodePtr Pat, bool isInput,
               CodeGenDAGPatterns &ise);
 
   /// getTrees - Return the tree patterns which corresponds to this pattern.
   ///
-  const std::vector<TreePatternNode*> &getTrees() const { return Trees; }
+  const std::vector<TreePatternNodePtr> &getTrees() const { return Trees; }
   unsigned getNumTrees() const { return Trees.size(); }
-  TreePatternNode *getTree(unsigned i) const { return Trees[i]; }
-  void setTree(unsigned i, TreePatternNode *Tree) { Trees[i] = Tree; }
-  TreePatternNode *getOnlyTree() const {
+  const TreePatternNodePtr &getTree(unsigned i) const { return Trees[i]; }
+  void setTree(unsigned i, TreePatternNodePtr Tree) { Trees[i] = Tree; }
+  const TreePatternNodePtr &getOnlyTree() const {
     assert(Trees.size() == 1 && "Doesn't have exactly one pattern!");
     return Trees[0];
   }
 
-  const StringMap<SmallVector<TreePatternNode*,1> > &getNamedNodesMap() {
+  const StringMap<SmallVector<TreePatternNode *, 1>> &getNamedNodesMap() {
     if (NamedNodes.empty())
       ComputeNamedNodes();
     return NamedNodes;
@@ -838,17 +859,20 @@ public:
 
   /// InlinePatternFragments - If this pattern refers to any pattern
   /// fragments, inline them into place, giving us a pattern without any
-  /// PatFrag references.
+  /// PatFrags references.  This may increase the number of trees in the
+  /// pattern if a PatFrags has multiple alternatives.
   void InlinePatternFragments() {
-    for (unsigned i = 0, e = Trees.size(); i != e; ++i)
-      Trees[i] = Trees[i]->InlinePatternFragments(*this);
+    std::vector<TreePatternNodePtr> Copy = Trees;
+    Trees.clear();
+    for (unsigned i = 0, e = Copy.size(); i != e; ++i)
+      Copy[i]->InlinePatternFragments(Copy[i], *this, Trees);
   }
 
   /// InferAllTypes - Infer/propagate as many types throughout the expression
   /// patterns as possible.  Return true if all types are inferred, false
   /// otherwise.  Bail out if a type contradiction is found.
-  bool InferAllTypes(const StringMap<SmallVector<TreePatternNode*,1> >
-                          *NamedTypes=nullptr);
+  bool InferAllTypes(
+      const StringMap<SmallVector<TreePatternNode *, 1>> *NamedTypes = nullptr);
 
   /// error - If this is the first error in the current resolution step,
   /// print it and set the error flag.  Otherwise, continue silently.
@@ -866,7 +890,7 @@ public:
   void dump() const;
 
 private:
-  TreePatternNode *ParseTreePattern(Init *DI, StringRef OpName);
+  TreePatternNodePtr ParseTreePattern(Init *DI, StringRef OpName);
   void ComputeNamedNodes();
   void ComputeNamedNodes(TreePatternNode *N);
 };
@@ -900,31 +924,30 @@ inline bool TreePatternNode::UpdateNodeType(unsigned ResNo,
 /// DAGDefaultOperand - One of these is created for each OperandWithDefaultOps
 /// that has a set ExecuteAlways / DefaultOps field.
 struct DAGDefaultOperand {
-  std::vector<TreePatternNode*> DefaultOps;
+  std::vector<TreePatternNodePtr> DefaultOps;
 };
 
 class DAGInstruction {
-  TreePattern *Pattern;
   std::vector<Record*> Results;
   std::vector<Record*> Operands;
   std::vector<Record*> ImpResults;
-  TreePatternNode *ResultPattern;
+  TreePatternNodePtr SrcPattern;
+  TreePatternNodePtr ResultPattern;
+
 public:
-  DAGInstruction(TreePattern *TP,
-                 const std::vector<Record*> &results,
+  DAGInstruction(const std::vector<Record*> &results,
                  const std::vector<Record*> &operands,
-                 const std::vector<Record*> &impresults)
-    : Pattern(TP), Results(results), Operands(operands),
-      ImpResults(impresults), ResultPattern(nullptr) {}
+                 const std::vector<Record*> &impresults,
+                 TreePatternNodePtr srcpattern = nullptr,
+                 TreePatternNodePtr resultpattern = nullptr)
+    : Results(results), Operands(operands), ImpResults(impresults),
+      SrcPattern(srcpattern), ResultPattern(resultpattern) {}
 
-  TreePattern *getPattern() const { return Pattern; }
   unsigned getNumResults() const { return Results.size(); }
   unsigned getNumOperands() const { return Operands.size(); }
   unsigned getNumImpResults() const { return ImpResults.size(); }
   const std::vector<Record*>& getImpResults() const { return ImpResults; }
 
-  void setResultPattern(TreePatternNode *R) { ResultPattern = R; }
-
   Record *getResult(unsigned RN) const {
     assert(RN < Results.size());
     return Results[RN];
@@ -940,7 +963,8 @@ public:
     return ImpResults[RN];
   }
 
-  TreePatternNode *getResultPattern() const { return ResultPattern; }
+  TreePatternNodePtr getSrcPattern() const { return SrcPattern; }
+  TreePatternNodePtr getResultPattern() const { return ResultPattern; }
 };
 
 /// This class represents a condition that has to be satisfied for a pattern
@@ -994,25 +1018,17 @@ public:
 /// processed to produce isel.
 class PatternToMatch {
 public:
-  PatternToMatch(Record *srcrecord, const std::vector<Predicate> &preds,
-                 TreePatternNode *src, TreePatternNode *dst,
-                 const std::vector<Record*> &dstregs,
-                 int complexity, unsigned uid, unsigned setmode = 0)
-    : SrcRecord(srcrecord), SrcPattern(src), DstPattern(dst),
-      Predicates(preds), Dstregs(std::move(dstregs)),
-      AddedComplexity(complexity), ID(uid), ForceMode(setmode) {}
-
-  PatternToMatch(Record *srcrecord, std::vector<Predicate> &&preds,
-                 TreePatternNode *src, TreePatternNode *dst,
-                 std::vector<Record*> &&dstregs,
-                 int complexity, unsigned uid, unsigned setmode = 0)
-    : SrcRecord(srcrecord), SrcPattern(src), DstPattern(dst),
-      Predicates(preds), Dstregs(std::move(dstregs)),
-      AddedComplexity(complexity), ID(uid), ForceMode(setmode) {}
+  PatternToMatch(Record *srcrecord, std::vector<Predicate> preds,
+                 TreePatternNodePtr src, TreePatternNodePtr dst,
+                 std::vector<Record *> dstregs, int complexity,
+                 unsigned uid, unsigned setmode = 0)
+      : SrcRecord(srcrecord), SrcPattern(src), DstPattern(dst),
+        Predicates(std::move(preds)), Dstregs(std::move(dstregs)),
+        AddedComplexity(complexity), ID(uid), ForceMode(setmode) {}
 
   Record          *SrcRecord;   // Originating Record for the pattern.
-  TreePatternNode *SrcPattern;  // Source pattern to match.
-  TreePatternNode *DstPattern;  // Resulting pattern.
+  TreePatternNodePtr SrcPattern;      // Source pattern to match.
+  TreePatternNodePtr DstPattern;      // Resulting pattern.
   std::vector<Predicate> Predicates;  // Top level predicate conditions
                                       // to match.
   std::vector<Record*> Dstregs; // Physical register defs being matched.
@@ -1021,8 +1037,10 @@ public:
   unsigned         ForceMode;   // Force this mode in type inference when set.
 
   Record          *getSrcRecord()  const { return SrcRecord; }
-  TreePatternNode *getSrcPattern() const { return SrcPattern; }
-  TreePatternNode *getDstPattern() const { return DstPattern; }
+  TreePatternNode *getSrcPattern() const { return SrcPattern.get(); }
+  TreePatternNodePtr getSrcPatternShared() const { return SrcPattern; }
+  TreePatternNode *getDstPattern() const { return DstPattern.get(); }
+  TreePatternNodePtr getDstPatternShared() const { return DstPattern; }
   const std::vector<Record*> &getDstRegs() const { return Dstregs; }
   int         getAddedComplexity() const { return AddedComplexity; }
   const std::vector<Predicate> &getPredicates() const { return Predicates; }
@@ -1156,7 +1174,7 @@ public:
 
   /// Parse the Pattern for an instruction, and insert the result in DAGInsts.
   typedef std::map<Record*, DAGInstruction, LessRecordByID> DAGInstMap;
-  const DAGInstruction &parseInstructionPattern(
+  void parseInstructionPattern(
       CodeGenInstruction &CGI, ListInit *Pattern,
       DAGInstMap &DAGInsts);
 
@@ -1193,13 +1211,15 @@ private:
 
   std::vector<Predicate> makePredList(ListInit *L);
 
+  void ParseOnePattern(Record *TheDef,
+                       TreePattern &Pattern, TreePattern &Result,
+                       const std::vector<Record *> &InstImpResults);
   void AddPatternToMatch(TreePattern *Pattern, PatternToMatch &&PTM);
-  void FindPatternInputsAndOutputs(TreePattern *I, TreePatternNode *Pat,
-                                   std::map<std::string,
-                                   TreePatternNode*> &InstInputs,
-                                   std::map<std::string,
-                                   TreePatternNode*> &InstResults,
-                                   std::vector<Record*> &InstImpResults);
+  void FindPatternInputsAndOutputs(
+      TreePattern &I, TreePatternNodePtr Pat,
+      std::map<std::string, TreePatternNodePtr> &InstInputs,
+      std::map<std::string, TreePatternNodePtr> &InstResults,
+      std::vector<Record *> &InstImpResults);
 };
 
 
diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp
index 44ee16f6fd74..eb35020d3d3a 100644
--- a/utils/TableGen/CodeGenInstruction.cpp
+++ b/utils/TableGen/CodeGenInstruction.cpp
@@ -306,11 +306,13 @@ CodeGenInstruction::CodeGenInstruction(Record *R)
   isIndirectBranch = R->getValueAsBit("isIndirectBranch");
   isCompare    = R->getValueAsBit("isCompare");
   isMoveImm    = R->getValueAsBit("isMoveImm");
+  isMoveReg    = R->getValueAsBit("isMoveReg");
   isBitcast    = R->getValueAsBit("isBitcast");
   isSelect     = R->getValueAsBit("isSelect");
   isBarrier    = R->getValueAsBit("isBarrier");
   isCall       = R->getValueAsBit("isCall");
   isAdd        = R->getValueAsBit("isAdd");
+  isTrap       = R->getValueAsBit("isTrap");
   canFoldAsLoad = R->getValueAsBit("canFoldAsLoad");
   isPredicable = Operands.isPredicable || R->getValueAsBit("isPredicable");
   isConvertibleToThreeAddress = R->getValueAsBit("isConvertibleToThreeAddress");
@@ -327,6 +329,7 @@ CodeGenInstruction::CodeGenInstruction(Record *R)
   isInsertSubreg = R->getValueAsBit("isInsertSubreg");
   isConvergent = R->getValueAsBit("isConvergent");
   hasNoSchedulingInfo = R->getValueAsBit("hasNoSchedulingInfo");
+  FastISelShouldIgnore = R->getValueAsBit("FastISelShouldIgnore");
 
   bool Unset;
   mayLoad      = R->getValueAsBitOrUnset("mayLoad", Unset);
@@ -344,6 +347,10 @@ CodeGenInstruction::CodeGenInstruction(Record *R)
   ImplicitDefs = R->getValueAsListOfDefs("Defs");
   ImplicitUses = R->getValueAsListOfDefs("Uses");
 
+  // This flag is only inferred from the pattern.
+  hasChain = false;
+  hasChain_Inferred = false;
+
   // Parse Constraints.
   ParseConstraints(R->getValueAsString("Constraints"), Operands);
 
@@ -588,12 +595,10 @@ unsigned CodeGenInstAlias::ResultOperand::getMINumOperands() const {
   return MIOpInfo->getNumArgs();
 }
 
-CodeGenInstAlias::CodeGenInstAlias(Record *R, unsigned Variant,
-                                   CodeGenTarget &T)
+CodeGenInstAlias::CodeGenInstAlias(Record *R, CodeGenTarget &T)
     : TheDef(R) {
   Result = R->getValueAsDag("ResultInst");
   AsmString = R->getValueAsString("AsmString");
-  AsmString = CodeGenInstruction::FlattenAsmStringVariants(AsmString, Variant);
 
 
   // Verify that the root of the result is an instruction.
@@ -630,8 +635,14 @@ CodeGenInstAlias::CodeGenInstAlias(Record *R, unsigned Variant,
     // of a complex operand, in which case we include them anyways, as we
     // don't have any other way to specify the whole operand.
     if (ResultInst->Operands[i].MINumOperands == 1 &&
-        ResultInst->Operands[i].getTiedRegister() != -1)
-      continue;
+        ResultInst->Operands[i].getTiedRegister() != -1) {
+      // Tied operands of different RegisterClass should be explicit within an
+      // instruction's syntax and so cannot be skipped.
+      int TiedOpNum = ResultInst->Operands[i].getTiedRegister();
+      if (ResultInst->Operands[i].Rec->getName() ==
+          ResultInst->Operands[TiedOpNum].Rec->getName())
+        continue;
+    }
 
     if (AliasOpNo >= Result->getNumArgs())
       PrintFatalError(R->getLoc(), "not enough arguments for instruction!");
diff --git a/utils/TableGen/CodeGenInstruction.h b/utils/TableGen/CodeGenInstruction.h
index 9cff95b1247f..a50c3e60e6e7 100644
--- a/utils/TableGen/CodeGenInstruction.h
+++ b/utils/TableGen/CodeGenInstruction.h
@@ -15,7 +15,7 @@
 #define LLVM_UTILS_TABLEGEN_CODEGENINSTRUCTION_H
 
 #include "llvm/ADT/StringRef.h"
-#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/Support/MachineValueType.h"
 #include "llvm/Support/SMLoc.h"
 #include <string>
 #include <utility>
@@ -226,11 +226,13 @@ template <typename T> class ArrayRef;
     bool isIndirectBranch : 1;
     bool isCompare : 1;
     bool isMoveImm : 1;
+    bool isMoveReg : 1;
     bool isBitcast : 1;
     bool isSelect : 1;
     bool isBarrier : 1;
     bool isCall : 1;
     bool isAdd : 1;
+    bool isTrap : 1;
     bool canFoldAsLoad : 1;
     bool mayLoad : 1;
     bool mayLoad_Unset : 1;
@@ -258,6 +260,9 @@ template <typename T> class ArrayRef;
     bool isInsertSubreg : 1;
     bool isConvergent : 1;
     bool hasNoSchedulingInfo : 1;
+    bool FastISelShouldIgnore : 1;
+    bool hasChain : 1;
+    bool hasChain_Inferred : 1;
 
     std::string DeprecatedReason;
     bool HasComplexDeprecationPredicate;
@@ -350,7 +355,7 @@ template <typename T> class ArrayRef;
     /// of them are matched by the operand, the second value should be -1.
     std::vector<std::pair<unsigned, int> > ResultInstOperandIndex;
 
-    CodeGenInstAlias(Record *R, unsigned Variant, CodeGenTarget &T);
+    CodeGenInstAlias(Record *R, CodeGenTarget &T);
 
     bool tryAliasOpMatch(DagInit *Result, unsigned AliasOpNo,
                          Record *InstOpRec, bool hasSubOps, ArrayRef<SMLoc> Loc,
diff --git a/utils/TableGen/CodeGenIntrinsics.h b/utils/TableGen/CodeGenIntrinsics.h
index 91305034dc24..5d0715959120 100644
--- a/utils/TableGen/CodeGenIntrinsics.h
+++ b/utils/TableGen/CodeGenIntrinsics.h
@@ -15,7 +15,7 @@
 #define LLVM_UTILS_TABLEGEN_CODEGENINTRINSICS_H
 
 #include "SDNodeProperties.h"
-#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/Support/MachineValueType.h"
 #include <string>
 #include <vector>
 
diff --git a/utils/TableGen/CodeGenMapTable.cpp b/utils/TableGen/CodeGenMapTable.cpp
index 43348b622a74..e5b0426cdcc3 100644
--- a/utils/TableGen/CodeGenMapTable.cpp
+++ b/utils/TableGen/CodeGenMapTable.cpp
@@ -243,7 +243,12 @@ void MapTableEmitter::buildRowInstrMap() {
     std::vector<Init*> KeyValue;
     ListInit *RowFields = InstrMapDesc.getRowFields();
     for (Init *RowField : RowFields->getValues()) {
-      Init *CurInstrVal = CurInstr->getValue(RowField)->getValue();
+      RecordVal *RecVal = CurInstr->getValue(RowField);
+      if (RecVal == nullptr)
+        PrintFatalError(CurInstr->getLoc(), "No value " +
+                        RowField->getAsString() + " found in \"" +
+                        CurInstr->getName() + "\" instruction description.");
+      Init *CurInstrVal = RecVal->getValue();
       KeyValue.push_back(CurInstrVal);
     }
 
diff --git a/utils/TableGen/CodeGenRegisters.cpp b/utils/TableGen/CodeGenRegisters.cpp
index a6b0a4beb8ea..b0d13b7d38f3 100644
--- a/utils/TableGen/CodeGenRegisters.cpp
+++ b/utils/TableGen/CodeGenRegisters.cpp
@@ -21,7 +21,6 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SparseBitVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
@@ -52,7 +51,7 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//
 
 CodeGenSubRegIndex::CodeGenSubRegIndex(Record *R, unsigned Enum)
-  : TheDef(R), EnumValue(Enum), AllSuperRegsCovered(true) {
+  : TheDef(R), EnumValue(Enum), AllSuperRegsCovered(true), Artificial(true) {
   Name = R->getName();
   if (R->getValue("Namespace"))
     Namespace = R->getValueAsString("Namespace");
@@ -63,7 +62,7 @@ CodeGenSubRegIndex::CodeGenSubRegIndex(Record *R, unsigned Enum)
 CodeGenSubRegIndex::CodeGenSubRegIndex(StringRef N, StringRef Nspace,
                                        unsigned Enum)
   : TheDef(nullptr), Name(N), Namespace(Nspace), Size(-1), Offset(-1),
-    EnumValue(Enum), AllSuperRegsCovered(true) {
+    EnumValue(Enum), AllSuperRegsCovered(true), Artificial(true) {
 }
 
 std::string CodeGenSubRegIndex::getQualifiedName() const {
@@ -162,8 +161,9 @@ CodeGenRegister::CodeGenRegister(Record *R, unsigned Enum)
     HasDisjunctSubRegs(false),
     SubRegsComplete(false),
     SuperRegsComplete(false),
-    TopoSig(~0u)
-{}
+    TopoSig(~0u) {
+  Artificial = R->getValueAsBit("isArtificial");
+}
 
 void CodeGenRegister::buildObjectGraph(CodeGenRegBank &RegBank) {
   std::vector<Record*> SRIs = TheDef->getValueAsListOfDefs("SubRegIndices");
@@ -276,6 +276,8 @@ CodeGenRegister::computeSubRegs(CodeGenRegBank &RegBank) {
   for (unsigned i = 0, e = ExplicitSubRegs.size(); i != e; ++i) {
     CodeGenRegister *SR = ExplicitSubRegs[i];
     CodeGenSubRegIndex *Idx = ExplicitSubRegIndices[i];
+    if (!SR->Artificial)
+      Idx->Artificial = false;
     if (!SubRegs.insert(std::make_pair(Idx, SR)).second)
       PrintFatalError(TheDef->getLoc(), "SubRegIndex " + Idx->getName() +
                       " appears twice in Register " + getName());
@@ -386,13 +388,17 @@ CodeGenRegister::computeSubRegs(CodeGenRegBank &RegBank) {
   // user already specified.
   for (unsigned i = 0, e = ExplicitSubRegs.size(); i != e; ++i) {
     CodeGenRegister *SR = ExplicitSubRegs[i];
-    if (!SR->CoveredBySubRegs || SR->ExplicitSubRegs.size() <= 1)
+    if (!SR->CoveredBySubRegs || SR->ExplicitSubRegs.size() <= 1 ||
+        SR->Artificial)
       continue;
 
     // SR is composed of multiple sub-regs. Find their names in this register.
     SmallVector<CodeGenSubRegIndex*, 8> Parts;
-    for (unsigned j = 0, e = SR->ExplicitSubRegs.size(); j != e; ++j)
-      Parts.push_back(getSubRegIndex(SR->ExplicitSubRegs[j]));
+    for (unsigned j = 0, e = SR->ExplicitSubRegs.size(); j != e; ++j) {
+      CodeGenSubRegIndex &I = *SR->ExplicitSubRegIndices[j];
+      if (!I.Artificial)
+        Parts.push_back(getSubRegIndex(SR->ExplicitSubRegs[j]));
+    }
 
     // Offer this as an existing spelling for the concatenation of Parts.
     CodeGenSubRegIndex &Idx = *ExplicitSubRegIndices[i];
@@ -602,6 +608,13 @@ unsigned CodeGenRegister::getWeight(const CodeGenRegBank &RegBank) const {
 namespace {
 
 struct TupleExpander : SetTheory::Expander {
+  // Reference to SynthDefs in the containing CodeGenRegBank, to keep track of
+  // the synthesized definitions for their lifetime.
+  std::vector<std::unique_ptr<Record>> &SynthDefs;
+
+  TupleExpander(std::vector<std::unique_ptr<Record>> &SynthDefs)
+      : SynthDefs(SynthDefs) {}
+
   void expand(SetTheory &ST, Record *Def, SetTheory::RecSet &Elts) override {
     std::vector<Record*> Indices = Def->getValueAsListOfDefs("SubRegIndices");
     unsigned Dim = Indices.size();
@@ -646,7 +659,9 @@ struct TupleExpander : SetTheory::Expander {
       // Create a new Record representing the synthesized register. This record
       // is only for consumption by CodeGenRegister, it is not added to the
       // RecordKeeper.
-      Record *NewReg = new Record(Name, Def->getLoc(), Def->getRecords());
+      SynthDefs.emplace_back(
+          llvm::make_unique<Record>(Name, Def->getLoc(), Def->getRecords()));
+      Record *NewReg = SynthDefs.back().get();
       Elts.insert(NewReg);
 
       // Copy Proto super-classes.
@@ -710,7 +725,7 @@ struct TupleExpander : SetTheory::Expander {
 //===----------------------------------------------------------------------===//
 
 static void sortAndUniqueRegisters(CodeGenRegister::Vec &M) {
-  std::sort(M.begin(), M.end(), deref<llvm::less>());
+  llvm::sort(M.begin(), M.end(), deref<llvm::less>());
   M.erase(std::unique(M.begin(), M.end(), deref<llvm::equal>()), M.end());
 }
 
@@ -736,10 +751,12 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R)
   Orders.resize(1 + AltOrders->size());
 
   // Default allocation order always contains all registers.
+  Artificial = true;
   for (unsigned i = 0, e = Elements->size(); i != e; ++i) {
     Orders[0].push_back((*Elements)[i]);
     const CodeGenRegister *Reg = RegBank.getReg((*Elements)[i]);
     Members.push_back(Reg);
+    Artificial &= Reg->Artificial;
     TopoSigs.set(Reg->getTopoSig());
   }
   sortAndUniqueRegisters(Members);
@@ -798,8 +815,11 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank,
     CopyCost(0),
     Allocatable(true),
     AllocationPriority(0) {
-  for (const auto R : Members)
+  Artificial = true;
+  for (const auto R : Members) {
     TopoSigs.set(R->getTopoSig());
+    Artificial &= R->Artificial;
+  }
 }
 
 // Compute inherited propertied for a synthesized register class.
@@ -915,6 +935,8 @@ void CodeGenRegisterClass::computeSubClasses(CodeGenRegBank &RegBank) {
     CodeGenRegisterClass &RC = *I;
     RC.SubClasses.resize(RegClasses.size());
     RC.SubClasses.set(RC.EnumValue);
+    if (RC.Artificial)
+      continue;
 
     // Normally, all subclasses have IDs >= rci, unless RC is part of a clique.
     for (auto I2 = I.base(), E2 = RegClasses.end(); I2 != E2; ++I2) {
@@ -975,7 +997,7 @@ CodeGenRegisterClass::getMatchingSubClassWithSubRegs(
   for (auto &RC : RegClasses)
     if (SuperRegRCsBV[RC.EnumValue])
       SuperRegRCs.emplace_back(&RC);
-  std::sort(SuperRegRCs.begin(), SuperRegRCs.end(), SizeOrder);
+  llvm::sort(SuperRegRCs.begin(), SuperRegRCs.end(), SizeOrder);
   assert(SuperRegRCs.front() == BiggestSuperRegRC && "Biggest class wasn't first");
 
   // Find all the subreg classes and order them by size too.
@@ -986,11 +1008,11 @@ CodeGenRegisterClass::getMatchingSubClassWithSubRegs(
     if (SuperRegClassesBV.any())
       SuperRegClasses.push_back(std::make_pair(&RC, SuperRegClassesBV));
   }
-  std::sort(SuperRegClasses.begin(), SuperRegClasses.end(),
-            [&](const std::pair<CodeGenRegisterClass *, BitVector> &A,
-                const std::pair<CodeGenRegisterClass *, BitVector> &B) {
-              return SizeOrder(A.first, B.first);
-            });
+  llvm::sort(SuperRegClasses.begin(), SuperRegClasses.end(),
+             [&](const std::pair<CodeGenRegisterClass *, BitVector> &A,
+                 const std::pair<CodeGenRegisterClass *, BitVector> &B) {
+               return SizeOrder(A.first, B.first);
+             });
 
   // Find the biggest subclass and subreg class such that R:subidx is in the
   // subreg class for all R in subclass.
@@ -1043,12 +1065,15 @@ void CodeGenRegisterClass::getSuperRegClasses(const CodeGenSubRegIndex *SubIdx,
 }
 
 // Populate a unique sorted list of units from a register set.
-void CodeGenRegisterClass::buildRegUnitSet(
+void CodeGenRegisterClass::buildRegUnitSet(const CodeGenRegBank &RegBank,
   std::vector<unsigned> &RegUnits) const {
   std::vector<unsigned> TmpUnits;
-  for (RegUnitIterator UnitI(Members); UnitI.isValid(); ++UnitI)
-    TmpUnits.push_back(*UnitI);
-  std::sort(TmpUnits.begin(), TmpUnits.end());
+  for (RegUnitIterator UnitI(Members); UnitI.isValid(); ++UnitI) {
+    const RegUnit &RU = RegBank.getRegUnit(*UnitI);
+    if (!RU.Artificial)
+      TmpUnits.push_back(*UnitI);
+  }
+  llvm::sort(TmpUnits.begin(), TmpUnits.end());
   std::unique_copy(TmpUnits.begin(), TmpUnits.end(),
                    std::back_inserter(RegUnits));
 }
@@ -1062,12 +1087,13 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records,
   // Configure register Sets to understand register classes and tuples.
   Sets.addFieldExpander("RegisterClass", "MemberList");
   Sets.addFieldExpander("CalleeSavedRegs", "SaveList");
-  Sets.addExpander("RegisterTuples", llvm::make_unique<TupleExpander>());
+  Sets.addExpander("RegisterTuples",
+                   llvm::make_unique<TupleExpander>(SynthDefs));
 
   // Read in the user-defined (named) sub-register indices.
   // More indices will be synthesized later.
   std::vector<Record*> SRIs = Records.getAllDerivedDefinitions("SubRegIndex");
-  std::sort(SRIs.begin(), SRIs.end(), LessRecord());
+  llvm::sort(SRIs.begin(), SRIs.end(), LessRecord());
   for (unsigned i = 0, e = SRIs.size(); i != e; ++i)
     getSubRegIdx(SRIs[i]);
   // Build composite maps from ComposedOf fields.
@@ -1076,7 +1102,7 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records,
 
   // Read in the register definitions.
   std::vector<Record*> Regs = Records.getAllDerivedDefinitions("Register");
-  std::sort(Regs.begin(), Regs.end(), LessRecordRegister());
+  llvm::sort(Regs.begin(), Regs.end(), LessRecordRegister());
   // Assign the enumeration values.
   for (unsigned i = 0, e = Regs.size(); i != e; ++i)
     getReg(Regs[i]);
@@ -1087,7 +1113,7 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records,
 
   for (Record *R : Tups) {
     std::vector<Record *> TupRegs = *Sets.expand(R);
-    std::sort(TupRegs.begin(), TupRegs.end(), LessRecordRegister());
+    llvm::sort(TupRegs.begin(), TupRegs.end(), LessRecordRegister());
     for (Record *RC : TupRegs)
       getReg(RC);
   }
@@ -1131,6 +1157,18 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records,
   for (auto &Reg : Registers)
     Reg.computeSuperRegs(*this);
 
+  // For each pair of Reg:SR, if both are non-artificial, mark the
+  // corresponding sub-register index as non-artificial.
+  for (auto &Reg : Registers) {
+    if (Reg.Artificial)
+      continue;
+    for (auto P : Reg.getSubRegs()) {
+      const CodeGenRegister *SR = P.second;
+      if (!SR->Artificial)
+        P.first->Artificial = false;
+    }
+  }
+
   // Native register units are associated with a leaf register. They've all been
   // discovered now.
   NumNativeRegUnits = RegUnits.size();
@@ -1141,9 +1179,11 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records,
     PrintFatalError("No 'RegisterClass' subclasses defined!");
 
   // Allocate user-defined register classes.
-  for (auto *RC : RCs) {
-    RegClasses.emplace_back(*this, RC);
-    addToMaps(&RegClasses.back());
+  for (auto *R : RCs) {
+    RegClasses.emplace_back(*this, R);
+    CodeGenRegisterClass &RC = RegClasses.back();
+    if (!RC.Artificial)
+      addToMaps(&RC);
   }
 
   // Infer missing classes to create a full algebra.
@@ -1554,21 +1594,24 @@ static void computeUberWeights(std::vector<UberRegSet> &UberSets,
         Reg = UnitI.getReg();
         Weight = 0;
       }
-      unsigned UWeight = RegBank.getRegUnit(*UnitI).Weight;
-      if (!UWeight) {
-        UWeight = 1;
-        RegBank.increaseRegUnitWeight(*UnitI, UWeight);
+      if (!RegBank.getRegUnit(*UnitI).Artificial) {
+        unsigned UWeight = RegBank.getRegUnit(*UnitI).Weight;
+        if (!UWeight) {
+          UWeight = 1;
+          RegBank.increaseRegUnitWeight(*UnitI, UWeight);
+        }
+        Weight += UWeight;
       }
-      Weight += UWeight;
     }
     if (Weight > MaxWeight)
       MaxWeight = Weight;
     if (I->Weight != MaxWeight) {
-      DEBUG(
-        dbgs() << "UberSet " << I - UberSets.begin() << " Weight " << MaxWeight;
-        for (auto &Unit : I->Regs)
-          dbgs() << " " << Unit->getName();
-        dbgs() << "\n");
+      LLVM_DEBUG(dbgs() << "UberSet " << I - UberSets.begin() << " Weight "
+                        << MaxWeight;
+                 for (auto &Unit
+                      : I->Regs) dbgs()
+                 << " " << Unit->getName();
+                 dbgs() << "\n");
       // Update the set weight.
       I->Weight = MaxWeight;
     }
@@ -1595,9 +1638,10 @@ static void computeUberWeights(std::vector<UberRegSet> &UberSets,
 static bool normalizeWeight(CodeGenRegister *Reg,
                             std::vector<UberRegSet> &UberSets,
                             std::vector<UberRegSet*> &RegSets,
-                            SparseBitVector<> &NormalRegs,
+                            BitVector &NormalRegs,
                             CodeGenRegister::RegUnitList &NormalUnits,
                             CodeGenRegBank &RegBank) {
+  NormalRegs.resize(std::max(Reg->EnumValue + 1, NormalRegs.size()));
   if (NormalRegs.test(Reg->EnumValue))
     return false;
   NormalRegs.set(Reg->EnumValue);
@@ -1637,7 +1681,8 @@ static bool normalizeWeight(CodeGenRegister *Reg,
     }
     else {
       // Adjust the existing single unit.
-      RegBank.increaseRegUnitWeight(AdjustUnit, UberSet->Weight - RegWeight);
+      if (!RegBank.getRegUnit(AdjustUnit).Artificial)
+        RegBank.increaseRegUnitWeight(AdjustUnit, UberSet->Weight - RegWeight);
       // The unit may be shared among sets and registers within this set.
       computeUberWeights(UberSets, RegBank);
     }
@@ -1670,7 +1715,7 @@ void CodeGenRegBank::computeRegUnitWeights() {
     Changed = false;
     for (auto &Reg : Registers) {
       CodeGenRegister::RegUnitList NormalUnits;
-      SparseBitVector<> NormalRegs;
+      BitVector NormalRegs;
       Changed |= normalizeWeight(&Reg, UberSets, RegSets, NormalRegs,
                                  NormalUnits, *this);
     }
@@ -1734,8 +1779,8 @@ void CodeGenRegBank::pruneUnitSets() {
           && (SubSet.Units.size() + 3 > SuperSet.Units.size())
           && UnitWeight == RegUnits[SuperSet.Units[0]].Weight
           && UnitWeight == RegUnits[SuperSet.Units.back()].Weight) {
-        DEBUG(dbgs() << "UnitSet " << SubIdx << " subsumed by " << SuperIdx
-              << "\n");
+        LLVM_DEBUG(dbgs() << "UnitSet " << SubIdx << " subsumed by " << SuperIdx
+                          << "\n");
         // We can pick any of the set names for the merged set. Go for the
         // shortest one to avoid picking the name of one of the classes that are
         // artificially created by tablegen. So "FPR128_lo" instead of
@@ -1771,7 +1816,7 @@ void CodeGenRegBank::computeRegUnitSets() {
   // Compute a unique RegUnitSet for each RegClass.
   auto &RegClasses = getRegClasses();
   for (auto &RC : RegClasses) {
-    if (!RC.Allocatable)
+    if (!RC.Allocatable || RC.Artificial)
       continue;
 
     // Speculatively grow the RegUnitSets to hold the new set.
@@ -1779,7 +1824,7 @@ void CodeGenRegBank::computeRegUnitSets() {
     RegUnitSets.back().Name = RC.getName();
 
     // Compute a sorted list of units in this class.
-    RC.buildRegUnitSet(RegUnitSets.back().Units);
+    RC.buildRegUnitSet(*this, RegUnitSets.back().Units);
 
     // Find an existing RegUnitSet.
     std::vector<RegUnitSet>::const_iterator SetI =
@@ -1788,29 +1833,26 @@ void CodeGenRegBank::computeRegUnitSets() {
       RegUnitSets.pop_back();
   }
 
-  DEBUG(dbgs() << "\nBefore pruning:\n";
-        for (unsigned USIdx = 0, USEnd = RegUnitSets.size();
-             USIdx < USEnd; ++USIdx) {
-          dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name
-                 << ":";
-          for (auto &U : RegUnitSets[USIdx].Units)
-            printRegUnitName(U);
-          dbgs() << "\n";
-        });
+  LLVM_DEBUG(dbgs() << "\nBefore pruning:\n"; for (unsigned USIdx = 0,
+                                                   USEnd = RegUnitSets.size();
+                                                   USIdx < USEnd; ++USIdx) {
+    dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":";
+    for (auto &U : RegUnitSets[USIdx].Units)
+      printRegUnitName(U);
+    dbgs() << "\n";
+  });
 
   // Iteratively prune unit sets.
   pruneUnitSets();
 
-  DEBUG(dbgs() << "\nBefore union:\n";
-        for (unsigned USIdx = 0, USEnd = RegUnitSets.size();
-             USIdx < USEnd; ++USIdx) {
-          dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name
-                 << ":";
-          for (auto &U : RegUnitSets[USIdx].Units)
-            printRegUnitName(U);
-          dbgs() << "\n";
-        }
-        dbgs() << "\nUnion sets:\n");
+  LLVM_DEBUG(dbgs() << "\nBefore union:\n"; for (unsigned USIdx = 0,
+                                                 USEnd = RegUnitSets.size();
+                                                 USIdx < USEnd; ++USIdx) {
+    dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":";
+    for (auto &U : RegUnitSets[USIdx].Units)
+      printRegUnitName(U);
+    dbgs() << "\n";
+  } dbgs() << "\nUnion sets:\n");
 
   // Iterate over all unit sets, including new ones added by this loop.
   unsigned NumRegUnitSubSets = RegUnitSets.size();
@@ -1850,11 +1892,11 @@ void CodeGenRegBank::computeRegUnitSets() {
       if (SetI != std::prev(RegUnitSets.end()))
         RegUnitSets.pop_back();
       else {
-        DEBUG(dbgs() << "UnitSet " << RegUnitSets.size()-1
-              << " " << RegUnitSets.back().Name << ":";
-              for (auto &U : RegUnitSets.back().Units)
-                printRegUnitName(U);
-              dbgs() << "\n";);
+        LLVM_DEBUG(dbgs() << "UnitSet " << RegUnitSets.size() - 1 << " "
+                          << RegUnitSets.back().Name << ":";
+                   for (auto &U
+                        : RegUnitSets.back().Units) printRegUnitName(U);
+                   dbgs() << "\n";);
       }
     }
   }
@@ -1862,15 +1904,14 @@ void CodeGenRegBank::computeRegUnitSets() {
   // Iteratively prune unit sets after inferring supersets.
   pruneUnitSets();
 
-  DEBUG(dbgs() << "\n";
-        for (unsigned USIdx = 0, USEnd = RegUnitSets.size();
-             USIdx < USEnd; ++USIdx) {
-          dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name
-                 << ":";
-          for (auto &U : RegUnitSets[USIdx].Units)
-            printRegUnitName(U);
-          dbgs() << "\n";
-        });
+  LLVM_DEBUG(
+      dbgs() << "\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size();
+                           USIdx < USEnd; ++USIdx) {
+        dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":";
+        for (auto &U : RegUnitSets[USIdx].Units)
+          printRegUnitName(U);
+        dbgs() << "\n";
+      });
 
   // For each register class, list the UnitSets that are supersets.
   RegClassUnitSets.resize(RegClasses.size());
@@ -1882,26 +1923,26 @@ void CodeGenRegBank::computeRegUnitSets() {
 
     // Recompute the sorted list of units in this class.
     std::vector<unsigned> RCRegUnits;
-    RC.buildRegUnitSet(RCRegUnits);
+    RC.buildRegUnitSet(*this, RCRegUnits);
 
     // Don't increase pressure for unallocatable regclasses.
     if (RCRegUnits.empty())
       continue;
 
-    DEBUG(dbgs() << "RC " << RC.getName() << " Units: \n";
-          for (auto U : RCRegUnits)
-            printRegUnitName(U);
-          dbgs() << "\n  UnitSetIDs:");
+    LLVM_DEBUG(dbgs() << "RC " << RC.getName() << " Units: \n";
+               for (auto U
+                    : RCRegUnits) printRegUnitName(U);
+               dbgs() << "\n  UnitSetIDs:");
 
     // Find all supersets.
     for (unsigned USIdx = 0, USEnd = RegUnitSets.size();
          USIdx != USEnd; ++USIdx) {
       if (isRegUnitSubSet(RCRegUnits, RegUnitSets[USIdx].Units)) {
-        DEBUG(dbgs() << " " << USIdx);
+        LLVM_DEBUG(dbgs() << " " << USIdx);
         RegClassUnitSets[RCIdx].push_back(USIdx);
       }
     }
-    DEBUG(dbgs() << "\n");
+    LLVM_DEBUG(dbgs() << "\n");
     assert(!RegClassUnitSets[RCIdx].empty() && "missing unit set for regclass");
   }
 
@@ -2069,10 +2110,14 @@ void CodeGenRegBank::inferSubClassWithSubReg(CodeGenRegisterClass *RC) {
   // Compute the set of registers supporting each SubRegIndex.
   SubReg2SetMap SRSets;
   for (const auto R : RC->getMembers()) {
+    if (R->Artificial)
+      continue;
     const CodeGenRegister::SubRegMap &SRM = R->getSubRegs();
     for (CodeGenRegister::SubRegMap::const_iterator I = SRM.begin(),
-         E = SRM.end(); I != E; ++I)
-      SRSets[I->first].push_back(R);
+         E = SRM.end(); I != E; ++I) {
+      if (!I->first->Artificial)
+        SRSets[I->first].push_back(R);
+    }
   }
 
   for (auto I : SRSets)
@@ -2081,6 +2126,8 @@ void CodeGenRegBank::inferSubClassWithSubReg(CodeGenRegisterClass *RC) {
   // Find matching classes for all SRSets entries.  Iterate in SubRegIndex
   // numerical order to visit synthetic indices last.
   for (const auto &SubIdx : SubRegIndices) {
+    if (SubIdx.Artificial)
+      continue;
     SubReg2SetMap::const_iterator I = SRSets.find(&SubIdx);
     // Unsupported SubRegIndex. Skip it.
     if (I == SRSets.end())
@@ -2137,6 +2184,8 @@ void CodeGenRegBank::inferMatchingSuperRegClass(CodeGenRegisterClass *RC,
     for (auto I = FirstSubRegRC, E = std::prev(RegClasses.end());
          I != std::next(E); ++I) {
       CodeGenRegisterClass &SubRC = *I;
+      if (SubRC.Artificial)
+        continue;
       // Topological shortcut: SubRC members have the wrong shape.
       if (!TopoSigs.anyCommon(SubRC.getTopoSigs()))
         continue;
@@ -2182,6 +2231,8 @@ void CodeGenRegBank::computeInferredRegisterClasses() {
   // Watch out for iterator invalidation here.
   for (auto I = RegClasses.begin(), E = RegClasses.end(); I != E; ++I) {
     CodeGenRegisterClass *RC = &*I;
+    if (RC->Artificial)
+      continue;
 
     // Synthesize answers for getSubClassWithSubReg().
     inferSubClassWithSubReg(RC);
diff --git a/utils/TableGen/CodeGenRegisters.h b/utils/TableGen/CodeGenRegisters.h
index f2f1e6971af9..32aa33c80b3a 100644
--- a/utils/TableGen/CodeGenRegisters.h
+++ b/utils/TableGen/CodeGenRegisters.h
@@ -19,16 +19,16 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SparseBitVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/MC/LaneBitmask.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/SetTheory.h"
 #include <cassert>
@@ -80,6 +80,10 @@ namespace llvm {
     // Are all super-registers containing this SubRegIndex covered by their
     // sub-registers?
     bool AllSuperRegsCovered;
+    // A subregister index is "artificial" if every subregister obtained
+    // from applying this index is artificial. Artificial subregister
+    // indexes are not used to create new register classes.
+    bool Artificial;
 
     CodeGenSubRegIndex(Record *R, unsigned Enum);
     CodeGenSubRegIndex(StringRef N, StringRef Nspace, unsigned Enum);
@@ -150,6 +154,7 @@ namespace llvm {
     unsigned CostPerUse;
     bool CoveredBySubRegs;
     bool HasDisjunctSubRegs;
+    bool Artificial;
 
     // Map SubRegIndex -> Register.
     typedef std::map<CodeGenSubRegIndex *, CodeGenRegister *, deref<llvm::less>>
@@ -331,6 +336,8 @@ namespace llvm {
     /// True if there are at least 2 subregisters which do not interfere.
     bool HasDisjunctSubRegs;
     bool CoveredBySubRegs;
+    /// A register class is artificial if all its members are artificial.
+    bool Artificial;
 
     // Return the Record that defined this class, or NULL if the class was
     // created by TableGen.
@@ -427,7 +434,8 @@ namespace llvm {
     const BitVector &getTopoSigs() const { return TopoSigs; }
 
     // Populate a unique sorted list of units from a register set.
-    void buildRegUnitSet(std::vector<unsigned> &RegUnits) const;
+    void buildRegUnitSet(const CodeGenRegBank &RegBank,
+                         std::vector<unsigned> &RegUnits) const;
 
     CodeGenRegisterClass(CodeGenRegBank&, Record *R);
 
@@ -475,8 +483,11 @@ namespace llvm {
     // Index into RegClassUnitSets where we can find the list of UnitSets that
     // contain this unit.
     unsigned RegClassUnitSetsIdx;
+    // A register unit is artificial if at least one of its roots is
+    // artificial.
+    bool Artificial;
 
-    RegUnit() : Weight(0), RegClassUnitSetsIdx(0) {
+    RegUnit() : Weight(0), RegClassUnitSetsIdx(0), Artificial(false) {
       Roots[0] = Roots[1] = nullptr;
     }
 
@@ -551,6 +562,9 @@ namespace llvm {
     // Give each register unit set an order based on sorting criteria.
     std::vector<unsigned> RegUnitSetOrder;
 
+    // Keep track of synthesized definitions generated in TupleExpander.
+    std::vector<std::unique_ptr<Record>> SynthDefs;
+
     // Add RC to *2RC maps.
     void addToMaps(CodeGenRegisterClass*);
 
@@ -648,8 +662,12 @@ namespace llvm {
     // registers.
     unsigned newRegUnit(CodeGenRegister *R0, CodeGenRegister *R1 = nullptr) {
       RegUnits.resize(RegUnits.size() + 1);
-      RegUnits.back().Roots[0] = R0;
-      RegUnits.back().Roots[1] = R1;
+      RegUnit &RU = RegUnits.back();
+      RU.Roots[0] = R0;
+      RU.Roots[1] = R1;
+      RU.Artificial = R0->Artificial;
+      if (R1)
+        RU.Artificial |= R1->Artificial;
       return RegUnits.size() - 1;
     }
 
diff --git a/utils/TableGen/CodeGenSchedule.cpp b/utils/TableGen/CodeGenSchedule.cpp
index b753e19a5443..9331fadf4099 100644
--- a/utils/TableGen/CodeGenSchedule.cpp
+++ b/utils/TableGen/CodeGenSchedule.cpp
@@ -12,17 +12,18 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "CodeGenInstruction.h"
 #include "CodeGenSchedule.h"
+#include "CodeGenInstruction.h"
 #include "CodeGenTarget.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Regex.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Error.h"
 #include <algorithm>
 #include <iterator>
@@ -50,38 +51,113 @@ struct InstrsOp : public SetTheory::Operator {
 };
 
 // (instregex "OpcPat",...) Find all instructions matching an opcode pattern.
-//
-// TODO: Since this is a prefix match, perform a binary search over the
-// instruction names using lower_bound. Note that the predefined instrs must be
-// scanned linearly first. However, this is only safe if the regex pattern has
-// no top-level bars. The DAG already has a list of patterns, so there's no
-// reason to use top-level bars, but we need a way to verify they don't exist
-// before implementing the optimization.
 struct InstRegexOp : public SetTheory::Operator {
   const CodeGenTarget &Target;
   InstRegexOp(const CodeGenTarget &t): Target(t) {}
 
+  /// Remove any text inside of parentheses from S.
+  static std::string removeParens(llvm::StringRef S) {
+    std::string Result;
+    unsigned Paren = 0;
+    // NB: We don't care about escaped parens here.
+    for (char C : S) {
+      switch (C) {
+      case '(':
+        ++Paren;
+        break;
+      case ')':
+        --Paren;
+        break;
+      default:
+        if (Paren == 0)
+          Result += C;
+      }
+    }
+    return Result;
+  }
+
   void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
              ArrayRef<SMLoc> Loc) override {
-    SmallVector<Regex, 4> RegexList;
+    ArrayRef<const CodeGenInstruction *> Instructions =
+        Target.getInstructionsByEnumValue();
+
+    unsigned NumGeneric = Target.getNumFixedInstructions();
+    unsigned NumPseudos = Target.getNumPseudoInstructions();
+    auto Generics = Instructions.slice(0, NumGeneric);
+    auto Pseudos = Instructions.slice(NumGeneric, NumPseudos);
+    auto NonPseudos = Instructions.slice(NumGeneric + NumPseudos);
+
     for (Init *Arg : make_range(Expr->arg_begin(), Expr->arg_end())) {
       StringInit *SI = dyn_cast<StringInit>(Arg);
       if (!SI)
-        PrintFatalError(Loc, "instregex requires pattern string: "
-          + Expr->getAsString());
-      std::string pat = SI->getValue();
-      // Implement a python-style prefix match.
-      if (pat[0] != '^') {
-        pat.insert(0, "^(");
-        pat.insert(pat.end(), ')');
+        PrintFatalError(Loc, "instregex requires pattern string: " +
+                                 Expr->getAsString());
+      StringRef Original = SI->getValue();
+
+      // Extract a prefix that we can binary search on.
+      static const char RegexMetachars[] = "()^$|*+?.[]\\{}";
+      auto FirstMeta = Original.find_first_of(RegexMetachars);
+
+      // Look for top-level | or ?. We cannot optimize them to binary search.
+      if (removeParens(Original).find_first_of("|?") != std::string::npos)
+        FirstMeta = 0;
+
+      Optional<Regex> Regexpr = None;
+      StringRef Prefix = Original.substr(0, FirstMeta);
+      StringRef PatStr = Original.substr(FirstMeta);
+      if (!PatStr.empty()) {
+        // For the rest use a python-style prefix match.
+        std::string pat = PatStr;
+        if (pat[0] != '^') {
+          pat.insert(0, "^(");
+          pat.insert(pat.end(), ')');
+        }
+        Regexpr = Regex(pat);
       }
-      RegexList.push_back(Regex(pat));
-    }
-    for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue()) {
-      for (auto &R : RegexList) {
-        if (R.match(Inst->TheDef->getName()))
+
+      int NumMatches = 0;
+
+      // The generic opcodes are unsorted, handle them manually.
+      for (auto *Inst : Generics) {
+        StringRef InstName = Inst->TheDef->getName();
+        if (InstName.startswith(Prefix) &&
+            (!Regexpr || Regexpr->match(InstName.substr(Prefix.size())))) {
           Elts.insert(Inst->TheDef);
+          NumMatches++;
+        }
       }
+
+      // Target instructions are split into two ranges: pseudo instructions
+      // first, than non-pseudos. Each range is in lexicographical order
+      // sorted by name. Find the sub-ranges that start with our prefix.
+      struct Comp {
+        bool operator()(const CodeGenInstruction *LHS, StringRef RHS) {
+          return LHS->TheDef->getName() < RHS;
+        }
+        bool operator()(StringRef LHS, const CodeGenInstruction *RHS) {
+          return LHS < RHS->TheDef->getName() &&
+                 !RHS->TheDef->getName().startswith(LHS);
+        }
+      };
+      auto Range1 =
+          std::equal_range(Pseudos.begin(), Pseudos.end(), Prefix, Comp());
+      auto Range2 = std::equal_range(NonPseudos.begin(), NonPseudos.end(),
+                                     Prefix, Comp());
+
+      // For these ranges we know that instruction names start with the prefix.
+      // Check if there's a regex that needs to be checked.
+      const auto HandleNonGeneric = [&](const CodeGenInstruction *Inst) {
+        StringRef InstName = Inst->TheDef->getName();
+        if (!Regexpr || Regexpr->match(InstName.substr(Prefix.size()))) {
+          Elts.insert(Inst->TheDef);
+          NumMatches++;
+        }
+      };
+      std::for_each(Range1.first, Range1.second, HandleNonGeneric);
+      std::for_each(Range2.first, Range2.second, HandleNonGeneric);
+
+      if (0 == NumMatches)
+        PrintFatalError(Loc, "instregex has no matches: " + Original);
     }
   }
 };
@@ -139,16 +215,49 @@ CodeGenSchedModels::CodeGenSchedModels(RecordKeeper &RK,
 
   // Populate each CodeGenProcModel's WriteResDefs, ReadAdvanceDefs, and
   // ProcResourceDefs.
-  DEBUG(dbgs() << "\n+++ RESOURCE DEFINITIONS (collectProcResources) +++\n");
+  LLVM_DEBUG(
+      dbgs() << "\n+++ RESOURCE DEFINITIONS (collectProcResources) +++\n");
   collectProcResources();
 
+  // Collect optional processor description.
+  collectOptionalProcessorInfo();
+
+  checkCompleteness();
+}
+
+void CodeGenSchedModels::collectRetireControlUnits() {
+  RecVec Units = Records.getAllDerivedDefinitions("RetireControlUnit");
+
+  for (Record *RCU : Units) {
+    CodeGenProcModel &PM = getProcModel(RCU->getValueAsDef("SchedModel"));
+    if (PM.RetireControlUnit) {
+      PrintError(RCU->getLoc(),
+                 "Expected a single RetireControlUnit definition");
+      PrintNote(PM.RetireControlUnit->getLoc(),
+                "Previous definition of RetireControlUnit was here");
+    }
+    PM.RetireControlUnit = RCU;
+  }
+}
+
+/// Collect optional processor information.
+void CodeGenSchedModels::collectOptionalProcessorInfo() {
+  // Find register file definitions for each processor.
+  collectRegisterFiles();
+
+  // Collect processor RetireControlUnit descriptors if available.
+  collectRetireControlUnits();
+
+  // Find pfm counter definitions for each processor.
+  collectPfmCounters();
+
   checkCompleteness();
 }
 
 /// Gather all processor models.
 void CodeGenSchedModels::collectProcModels() {
   RecVec ProcRecords = Records.getAllDerivedDefinitions("Processor");
-  std::sort(ProcRecords.begin(), ProcRecords.end(), LessRecordFieldName());
+  llvm::sort(ProcRecords.begin(), ProcRecords.end(), LessRecordFieldName());
 
   // Reserve space because we can. Reallocation would be ok.
   ProcModels.reserve(ProcRecords.size()+1);
@@ -160,7 +269,7 @@ void CodeGenSchedModels::collectProcModels() {
   ProcModelMap[NoModelDef] = 0;
 
   // For each processor, find a unique machine model.
-  DEBUG(dbgs() << "+++ PROCESSOR MODELs (addProcModel) +++\n");
+  LLVM_DEBUG(dbgs() << "+++ PROCESSOR MODELs (addProcModel) +++\n");
   for (Record *ProcRecord : ProcRecords)
     addProcModel(ProcRecord);
 }
@@ -184,7 +293,7 @@ void CodeGenSchedModels::addProcModel(Record *ProcDef) {
     ProcModels.emplace_back(ProcModels.size(), Name,
                             ProcDef->getValueAsDef("SchedModel"), ModelKey);
   }
-  DEBUG(ProcModels.back().dump());
+  LLVM_DEBUG(ProcModels.back().dump());
 }
 
 // Recursively find all reachable SchedReadWrite records.
@@ -267,7 +376,7 @@ void CodeGenSchedModels::collectSchedRW() {
   // Find all ReadWrites referenced by SchedAlias. AliasDefs needs to be sorted
   // for the loop below that initializes Alias vectors.
   RecVec AliasDefs = Records.getAllDerivedDefinitions("SchedAlias");
-  std::sort(AliasDefs.begin(), AliasDefs.end(), LessRecord());
+  llvm::sort(AliasDefs.begin(), AliasDefs.end(), LessRecord());
   for (Record *ADef : AliasDefs) {
     Record *MatchDef = ADef->getValueAsDef("MatchRW");
     Record *AliasDef = ADef->getValueAsDef("AliasRW");
@@ -285,12 +394,12 @@ void CodeGenSchedModels::collectSchedRW() {
   }
   // Sort and add the SchedReadWrites directly referenced by instructions or
   // itinerary resources. Index reads and writes in separate domains.
-  std::sort(SWDefs.begin(), SWDefs.end(), LessRecord());
+  llvm::sort(SWDefs.begin(), SWDefs.end(), LessRecord());
   for (Record *SWDef : SWDefs) {
     assert(!getSchedRWIdx(SWDef, /*IsRead=*/false) && "duplicate SchedWrite");
     SchedWrites.emplace_back(SchedWrites.size(), SWDef);
   }
-  std::sort(SRDefs.begin(), SRDefs.end(), LessRecord());
+  llvm::sort(SRDefs.begin(), SRDefs.end(), LessRecord());
   for (Record *SRDef : SRDefs) {
     assert(!getSchedRWIdx(SRDef, /*IsRead-*/true) && "duplicate SchedWrite");
     SchedReads.emplace_back(SchedReads.size(), SRDef);
@@ -312,26 +421,26 @@ void CodeGenSchedModels::collectSchedRW() {
       PrintFatalError(ADef->getLoc(), "Cannot Alias an Alias");
     RW.Aliases.push_back(ADef);
   }
-  DEBUG(
-    dbgs() << "\n+++ SCHED READS and WRITES (collectSchedRW) +++\n";
-    for (unsigned WIdx = 0, WEnd = SchedWrites.size(); WIdx != WEnd; ++WIdx) {
-      dbgs() << WIdx << ": ";
-      SchedWrites[WIdx].dump();
-      dbgs() << '\n';
-    }
-    for (unsigned RIdx = 0, REnd = SchedReads.size(); RIdx != REnd; ++RIdx) {
-      dbgs() << RIdx << ": ";
-      SchedReads[RIdx].dump();
-      dbgs() << '\n';
-    }
-    RecVec RWDefs = Records.getAllDerivedDefinitions("SchedReadWrite");
-    for (Record *RWDef : RWDefs) {
-      if (!getSchedRWIdx(RWDef, RWDef->isSubClassOf("SchedRead"))) {
-        const std::string &Name = RWDef->getName();
-        if (Name != "NoWrite" && Name != "ReadDefault")
-          dbgs() << "Unused SchedReadWrite " << RWDef->getName() << '\n';
-      }
-    });
+  LLVM_DEBUG(
+      dbgs() << "\n+++ SCHED READS and WRITES (collectSchedRW) +++\n";
+      for (unsigned WIdx = 0, WEnd = SchedWrites.size(); WIdx != WEnd; ++WIdx) {
+        dbgs() << WIdx << ": ";
+        SchedWrites[WIdx].dump();
+        dbgs() << '\n';
+      } for (unsigned RIdx = 0, REnd = SchedReads.size(); RIdx != REnd;
+             ++RIdx) {
+        dbgs() << RIdx << ": ";
+        SchedReads[RIdx].dump();
+        dbgs() << '\n';
+      } RecVec RWDefs = Records.getAllDerivedDefinitions("SchedReadWrite");
+      for (Record *RWDef
+           : RWDefs) {
+        if (!getSchedRWIdx(RWDef, RWDef->isSubClassOf("SchedRead"))) {
+          StringRef Name = RWDef->getName();
+          if (Name != "NoWrite" && Name != "ReadDefault")
+            dbgs() << "Unused SchedReadWrite " << Name << '\n';
+        }
+      });
 }
 
 /// Compute a SchedWrite name from a sequence of writes.
@@ -346,16 +455,12 @@ std::string CodeGenSchedModels::genRWName(ArrayRef<unsigned> Seq, bool IsRead) {
   return Name;
 }
 
-unsigned CodeGenSchedModels::getSchedRWIdx(Record *Def, bool IsRead,
-                                           unsigned After) const {
+unsigned CodeGenSchedModels::getSchedRWIdx(const Record *Def,
+                                           bool IsRead) const {
   const std::vector<CodeGenSchedRW> &RWVec = IsRead ? SchedReads : SchedWrites;
-  assert(After < RWVec.size() && "start position out of bounds");
-  for (std::vector<CodeGenSchedRW>::const_iterator I = RWVec.begin() + After,
-         E = RWVec.end(); I != E; ++I) {
-    if (I->TheDef == Def)
-      return I - RWVec.begin();
-  }
-  return 0;
+  const auto I = find_if(
+      RWVec, [Def](const CodeGenSchedRW &RW) { return RW.TheDef == Def; });
+  return I == RWVec.end() ? 0 : std::distance(RWVec.begin(), I);
 }
 
 bool CodeGenSchedModels::hasReadOfWrite(Record *WriteDef) const {
@@ -372,10 +477,8 @@ bool CodeGenSchedModels::hasReadOfWrite(Record *WriteDef) const {
   return false;
 }
 
-namespace llvm {
-
-void splitSchedReadWrites(const RecVec &RWDefs,
-                          RecVec &WriteDefs, RecVec &ReadDefs) {
+static void splitSchedReadWrites(const RecVec &RWDefs,
+                                 RecVec &WriteDefs, RecVec &ReadDefs) {
   for (Record *RWDef : RWDefs) {
     if (RWDef->isSubClassOf("SchedWrite"))
       WriteDefs.push_back(RWDef);
@@ -386,16 +489,14 @@ void splitSchedReadWrites(const RecVec &RWDefs,
   }
 }
 
-} // end namespace llvm
-
 // Split the SchedReadWrites defs and call findRWs for each list.
 void CodeGenSchedModels::findRWs(const RecVec &RWDefs,
                                  IdxVec &Writes, IdxVec &Reads) const {
-    RecVec WriteDefs;
-    RecVec ReadDefs;
-    splitSchedReadWrites(RWDefs, WriteDefs, ReadDefs);
-    findRWs(WriteDefs, Writes, false);
-    findRWs(ReadDefs, Reads, true);
+  RecVec WriteDefs;
+  RecVec ReadDefs;
+  splitSchedReadWrites(RWDefs, WriteDefs, ReadDefs);
+  findRWs(WriteDefs, Writes, false);
+  findRWs(ReadDefs, Reads, true);
 }
 
 // Call getSchedRWIdx for all elements in a sequence of SchedRW defs.
@@ -432,11 +533,10 @@ void CodeGenSchedModels::expandRWSeqForProc(
 
   const CodeGenSchedRW &SchedWrite = getSchedRW(RWIdx, IsRead);
   Record *AliasDef = nullptr;
-  for (RecIter AI = SchedWrite.Aliases.begin(), AE = SchedWrite.Aliases.end();
-       AI != AE; ++AI) {
-    const CodeGenSchedRW &AliasRW = getSchedRW((*AI)->getValueAsDef("AliasRW"));
-    if ((*AI)->getValueInit("SchedModel")->isComplete()) {
-      Record *ModelDef = (*AI)->getValueAsDef("SchedModel");
+  for (const Record *Rec : SchedWrite.Aliases) {
+    const CodeGenSchedRW &AliasRW = getSchedRW(Rec->getValueAsDef("AliasRW"));
+    if (Rec->getValueInit("SchedModel")->isComplete()) {
+      Record *ModelDef = Rec->getValueAsDef("SchedModel");
       if (&getProcModel(ModelDef) != &ProcModel)
         continue;
     }
@@ -457,9 +557,9 @@ void CodeGenSchedModels::expandRWSeqForProc(
   }
   int Repeat =
     SchedWrite.TheDef ? SchedWrite.TheDef->getValueAsInt("Repeat") : 1;
-  for (int i = 0; i < Repeat; ++i) {
-    for (unsigned I : SchedWrite.Sequence) {
-      expandRWSeqForProc(I, RWSeq, IsRead, ProcModel);
+  for (int I = 0, E = Repeat; I < E; ++I) {
+    for (unsigned Idx : SchedWrite.Sequence) {
+      expandRWSeqForProc(Idx, RWSeq, IsRead, ProcModel);
     }
   }
 }
@@ -469,13 +569,11 @@ unsigned CodeGenSchedModels::findRWForSequence(ArrayRef<unsigned> Seq,
                                                bool IsRead) {
   std::vector<CodeGenSchedRW> &RWVec = IsRead ? SchedReads : SchedWrites;
 
-  for (std::vector<CodeGenSchedRW>::iterator I = RWVec.begin(), E = RWVec.end();
-       I != E; ++I) {
-    if (makeArrayRef(I->Sequence) == Seq)
-      return I - RWVec.begin();
-  }
+  auto I = find_if(RWVec, [Seq](CodeGenSchedRW &RW) {
+    return makeArrayRef(RW.Sequence) == Seq;
+  });
   // Index zero reserved for invalid RW.
-  return 0;
+  return I == RWVec.end() ? 0 : std::distance(RWVec.begin(), I);
 }
 
 /// Add this ReadWrite if it doesn't already exist.
@@ -489,12 +587,10 @@ unsigned CodeGenSchedModels::findOrInsertRW(ArrayRef<unsigned> Seq,
   if (Idx)
     return Idx;
 
-  unsigned RWIdx = IsRead ? SchedReads.size() : SchedWrites.size();
+  std::vector<CodeGenSchedRW> &RWVec = IsRead ? SchedReads : SchedWrites;
+  unsigned RWIdx = RWVec.size();
   CodeGenSchedRW SchedRW(RWIdx, IsRead, Seq, genRWName(Seq, IsRead));
-  if (IsRead)
-    SchedReads.push_back(SchedRW);
-  else
-    SchedWrites.push_back(SchedRW);
+  RWVec.push_back(SchedRW);
   return RWIdx;
 }
 
@@ -504,10 +600,9 @@ unsigned CodeGenSchedModels::findOrInsertRW(ArrayRef<unsigned> Seq,
 void CodeGenSchedModels::collectSchedClasses() {
 
   // NoItinerary is always the first class at Idx=0
-  SchedClasses.resize(1);
-  SchedClasses.back().Index = 0;
-  SchedClasses.back().Name = "NoInstrModel";
-  SchedClasses.back().ItinClassDef = Records.getDef("NoItinerary");
+  assert(SchedClasses.empty() && "Expected empty sched class");
+  SchedClasses.emplace_back(0, "NoInstrModel",
+                            Records.getDef("NoItinerary"));
   SchedClasses.back().ProcIndices.push_back(0);
 
   // Create a SchedClass for each unique combination of itinerary class and
@@ -519,32 +614,34 @@ void CodeGenSchedModels::collectSchedClasses() {
       findRWs(Inst->TheDef->getValueAsListOfDefs("SchedRW"), Writes, Reads);
 
     // ProcIdx == 0 indicates the class applies to all processors.
-    IdxVec ProcIndices(1, 0);
-
-    unsigned SCIdx = addSchedClass(ItinDef, Writes, Reads, ProcIndices);
+    unsigned SCIdx = addSchedClass(ItinDef, Writes, Reads, /*ProcIndices*/{0});
     InstrClassMap[Inst->TheDef] = SCIdx;
   }
   // Create classes for InstRW defs.
   RecVec InstRWDefs = Records.getAllDerivedDefinitions("InstRW");
-  std::sort(InstRWDefs.begin(), InstRWDefs.end(), LessRecord());
-  DEBUG(dbgs() << "\n+++ SCHED CLASSES (createInstRWClass) +++\n");
+  llvm::sort(InstRWDefs.begin(), InstRWDefs.end(), LessRecord());
+  LLVM_DEBUG(dbgs() << "\n+++ SCHED CLASSES (createInstRWClass) +++\n");
   for (Record *RWDef : InstRWDefs)
     createInstRWClass(RWDef);
 
   NumInstrSchedClasses = SchedClasses.size();
 
   bool EnableDump = false;
-  DEBUG(EnableDump = true);
+  LLVM_DEBUG(EnableDump = true);
   if (!EnableDump)
     return;
 
-  dbgs() << "\n+++ ITINERARIES and/or MACHINE MODELS (collectSchedClasses) +++\n";
+  LLVM_DEBUG(
+      dbgs()
+      << "\n+++ ITINERARIES and/or MACHINE MODELS (collectSchedClasses) +++\n");
   for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue()) {
     StringRef InstName = Inst->TheDef->getName();
-    unsigned SCIdx = InstrClassMap.lookup(Inst->TheDef);
+    unsigned SCIdx = getSchedClassIdx(*Inst);
     if (!SCIdx) {
-      if (!Inst->hasNoSchedulingInfo)
-        dbgs() << "No machine model for " << Inst->TheDef->getName() << '\n';
+      LLVM_DEBUG({
+        if (!Inst->hasNoSchedulingInfo)
+          dbgs() << "No machine model for " << Inst->TheDef->getName() << '\n';
+      });
       continue;
     }
     CodeGenSchedClass &SC = getSchedClass(SCIdx);
@@ -560,58 +657,51 @@ void CodeGenSchedModels::collectSchedClasses() {
     }
     if (!SC.Writes.empty()) {
       ProcIndices.push_back(0);
-      dbgs() << "SchedRW machine model for " << InstName;
-      for (IdxIter WI = SC.Writes.begin(), WE = SC.Writes.end(); WI != WE; ++WI)
-        dbgs() << " " << SchedWrites[*WI].Name;
-      for (IdxIter RI = SC.Reads.begin(), RE = SC.Reads.end(); RI != RE; ++RI)
-        dbgs() << " " << SchedReads[*RI].Name;
-      dbgs() << '\n';
+      LLVM_DEBUG({
+        dbgs() << "SchedRW machine model for " << InstName;
+        for (IdxIter WI = SC.Writes.begin(), WE = SC.Writes.end(); WI != WE;
+             ++WI)
+          dbgs() << " " << SchedWrites[*WI].Name;
+        for (IdxIter RI = SC.Reads.begin(), RE = SC.Reads.end(); RI != RE; ++RI)
+          dbgs() << " " << SchedReads[*RI].Name;
+        dbgs() << '\n';
+      });
     }
     const RecVec &RWDefs = SchedClasses[SCIdx].InstRWs;
     for (Record *RWDef : RWDefs) {
       const CodeGenProcModel &ProcModel =
-        getProcModel(RWDef->getValueAsDef("SchedModel"));
+          getProcModel(RWDef->getValueAsDef("SchedModel"));
       ProcIndices.push_back(ProcModel.Index);
-      dbgs() << "InstRW on " << ProcModel.ModelName << " for " << InstName;
+      LLVM_DEBUG(dbgs() << "InstRW on " << ProcModel.ModelName << " for "
+                        << InstName);
       IdxVec Writes;
       IdxVec Reads;
       findRWs(RWDef->getValueAsListOfDefs("OperandReadWrites"),
               Writes, Reads);
-      for (unsigned WIdx : Writes)
-        dbgs() << " " << SchedWrites[WIdx].Name;
-      for (unsigned RIdx : Reads)
-        dbgs() << " " << SchedReads[RIdx].Name;
-      dbgs() << '\n';
+      LLVM_DEBUG({
+        for (unsigned WIdx : Writes)
+          dbgs() << " " << SchedWrites[WIdx].Name;
+        for (unsigned RIdx : Reads)
+          dbgs() << " " << SchedReads[RIdx].Name;
+        dbgs() << '\n';
+      });
     }
     // If ProcIndices contains zero, the class applies to all processors.
-    if (!std::count(ProcIndices.begin(), ProcIndices.end(), 0)) {
-      for (const CodeGenProcModel &PM : ProcModels) {
-        if (!std::count(ProcIndices.begin(), ProcIndices.end(), PM.Index))
-          dbgs() << "No machine model for " << Inst->TheDef->getName()
-                 << " on processor " << PM.ModelName << '\n';
+    LLVM_DEBUG({
+      if (!std::count(ProcIndices.begin(), ProcIndices.end(), 0)) {
+        for (const CodeGenProcModel &PM : ProcModels) {
+          if (!std::count(ProcIndices.begin(), ProcIndices.end(), PM.Index))
+            dbgs() << "No machine model for " << Inst->TheDef->getName()
+                   << " on processor " << PM.ModelName << '\n';
+        }
       }
-    }
-  }
-}
-
-/// Find an SchedClass that has been inferred from a per-operand list of
-/// SchedWrites and SchedReads.
-unsigned CodeGenSchedModels::findSchedClassIdx(Record *ItinClassDef,
-                                               ArrayRef<unsigned> Writes,
-                                               ArrayRef<unsigned> Reads) const {
-  for (SchedClassIter I = schedClassBegin(), E = schedClassEnd(); I != E; ++I) {
-    if (I->ItinClassDef == ItinClassDef && makeArrayRef(I->Writes) == Writes &&
-        makeArrayRef(I->Reads) == Reads) {
-      return I - schedClassBegin();
-    }
+    });
   }
-  return 0;
 }
 
 // Get the SchedClass index for an instruction.
-unsigned CodeGenSchedModels::getSchedClassIdx(
-  const CodeGenInstruction &Inst) const {
-
+unsigned
+CodeGenSchedModels::getSchedClassIdx(const CodeGenInstruction &Inst) const {
   return InstrClassMap.lookup(Inst.TheDef);
 }
 
@@ -655,22 +745,27 @@ unsigned CodeGenSchedModels::addSchedClass(Record *ItinClassDef,
                                            ArrayRef<unsigned> ProcIndices) {
   assert(!ProcIndices.empty() && "expect at least one ProcIdx");
 
-  unsigned Idx = findSchedClassIdx(ItinClassDef, OperWrites, OperReads);
+  auto IsKeyEqual = [=](const CodeGenSchedClass &SC) {
+                     return SC.isKeyEqual(ItinClassDef, OperWrites, OperReads);
+                   };
+
+  auto I = find_if(make_range(schedClassBegin(), schedClassEnd()), IsKeyEqual);
+  unsigned Idx = I == schedClassEnd() ? 0 : std::distance(schedClassBegin(), I);
   if (Idx || SchedClasses[0].isKeyEqual(ItinClassDef, OperWrites, OperReads)) {
     IdxVec PI;
     std::set_union(SchedClasses[Idx].ProcIndices.begin(),
                    SchedClasses[Idx].ProcIndices.end(),
                    ProcIndices.begin(), ProcIndices.end(),
                    std::back_inserter(PI));
-    SchedClasses[Idx].ProcIndices.swap(PI);
+    SchedClasses[Idx].ProcIndices = std::move(PI);
     return Idx;
   }
   Idx = SchedClasses.size();
-  SchedClasses.resize(Idx+1);
+  SchedClasses.emplace_back(Idx,
+                            createSchedClassName(ItinClassDef, OperWrites,
+                                                 OperReads),
+                            ItinClassDef);
   CodeGenSchedClass &SC = SchedClasses.back();
-  SC.Index = Idx;
-  SC.Name = createSchedClassName(ItinClassDef, OperWrites, OperReads);
-  SC.ItinClassDef = ItinClassDef;
   SC.Writes = OperWrites;
   SC.Reads = OperReads;
   SC.ProcIndices = ProcIndices;
@@ -685,106 +780,104 @@ void CodeGenSchedModels::createInstRWClass(Record *InstRWDef) {
   // intersects with an existing class via a previous InstRWDef. Instrs that do
   // not intersect with an existing class refer back to their former class as
   // determined from ItinDef or SchedRW.
-  SmallVector<std::pair<unsigned, SmallVector<Record *, 8>>, 4> ClassInstrs;
+  SmallMapVector<unsigned, SmallVector<Record *, 8>, 4> ClassInstrs;
   // Sort Instrs into sets.
   const RecVec *InstDefs = Sets.expand(InstRWDef);
   if (InstDefs->empty())
     PrintFatalError(InstRWDef->getLoc(), "No matching instruction opcodes");
 
-  for (Record *InstDef : make_range(InstDefs->begin(), InstDefs->end())) {
+  for (Record *InstDef : *InstDefs) {
     InstClassMapTy::const_iterator Pos = InstrClassMap.find(InstDef);
     if (Pos == InstrClassMap.end())
       PrintFatalError(InstDef->getLoc(), "No sched class for instruction.");
     unsigned SCIdx = Pos->second;
-    unsigned CIdx = 0, CEnd = ClassInstrs.size();
-    for (; CIdx != CEnd; ++CIdx) {
-      if (ClassInstrs[CIdx].first == SCIdx)
-        break;
-    }
-    if (CIdx == CEnd) {
-      ClassInstrs.resize(CEnd + 1);
-      ClassInstrs[CIdx].first = SCIdx;
-    }
-    ClassInstrs[CIdx].second.push_back(InstDef);
+    ClassInstrs[SCIdx].push_back(InstDef);
   }
   // For each set of Instrs, create a new class if necessary, and map or remap
   // the Instrs to it.
-  unsigned CIdx = 0, CEnd = ClassInstrs.size();
-  for (; CIdx != CEnd; ++CIdx) {
-    unsigned OldSCIdx = ClassInstrs[CIdx].first;
-    ArrayRef<Record*> InstDefs = ClassInstrs[CIdx].second;
+  for (auto &Entry : ClassInstrs) {
+    unsigned OldSCIdx = Entry.first;
+    ArrayRef<Record*> InstDefs = Entry.second;
     // If the all instrs in the current class are accounted for, then leave
     // them mapped to their old class.
     if (OldSCIdx) {
       const RecVec &RWDefs = SchedClasses[OldSCIdx].InstRWs;
       if (!RWDefs.empty()) {
         const RecVec *OrigInstDefs = Sets.expand(RWDefs[0]);
-        unsigned OrigNumInstrs = 0;
-        for (Record *OIDef : make_range(OrigInstDefs->begin(), OrigInstDefs->end())) {
-          if (InstrClassMap[OIDef] == OldSCIdx)
-            ++OrigNumInstrs;
-        }
+        unsigned OrigNumInstrs =
+          count_if(*OrigInstDefs, [&](Record *OIDef) {
+                     return InstrClassMap[OIDef] == OldSCIdx;
+                   });
         if (OrigNumInstrs == InstDefs.size()) {
           assert(SchedClasses[OldSCIdx].ProcIndices[0] == 0 &&
                  "expected a generic SchedClass");
-          DEBUG(dbgs() << "InstRW: Reuse SC " << OldSCIdx << ":"
-                << SchedClasses[OldSCIdx].Name << " on "
-                << InstRWDef->getValueAsDef("SchedModel")->getName() << "\n");
+          Record *RWModelDef = InstRWDef->getValueAsDef("SchedModel");
+          // Make sure we didn't already have a InstRW containing this
+          // instruction on this model.
+          for (Record *RWD : RWDefs) {
+            if (RWD->getValueAsDef("SchedModel") == RWModelDef &&
+                RWModelDef->getValueAsBit("FullInstRWOverlapCheck")) {
+              for (Record *Inst : InstDefs) {
+                PrintFatalError(InstRWDef->getLoc(), "Overlapping InstRW def " +
+                            Inst->getName() + " also matches " +
+                            RWD->getValue("Instrs")->getValue()->getAsString());
+              }
+            }
+          }
+          LLVM_DEBUG(dbgs() << "InstRW: Reuse SC " << OldSCIdx << ":"
+                            << SchedClasses[OldSCIdx].Name << " on "
+                            << RWModelDef->getName() << "\n");
           SchedClasses[OldSCIdx].InstRWs.push_back(InstRWDef);
           continue;
         }
       }
     }
     unsigned SCIdx = SchedClasses.size();
-    SchedClasses.resize(SCIdx+1);
+    SchedClasses.emplace_back(SCIdx, createSchedClassName(InstDefs), nullptr);
     CodeGenSchedClass &SC = SchedClasses.back();
-    SC.Index = SCIdx;
-    SC.Name = createSchedClassName(InstDefs);
-    DEBUG(dbgs() << "InstRW: New SC " << SCIdx << ":" << SC.Name << " on "
-          << InstRWDef->getValueAsDef("SchedModel")->getName() << "\n");
+    LLVM_DEBUG(dbgs() << "InstRW: New SC " << SCIdx << ":" << SC.Name << " on "
+                      << InstRWDef->getValueAsDef("SchedModel")->getName()
+                      << "\n");
 
     // Preserve ItinDef and Writes/Reads for processors without an InstRW entry.
     SC.ItinClassDef = SchedClasses[OldSCIdx].ItinClassDef;
     SC.Writes = SchedClasses[OldSCIdx].Writes;
     SC.Reads = SchedClasses[OldSCIdx].Reads;
     SC.ProcIndices.push_back(0);
-    // Map each Instr to this new class.
-    // Note that InstDefs may be a smaller list than InstRWDef's "Instrs".
-    Record *RWModelDef = InstRWDef->getValueAsDef("SchedModel");
-    SmallSet<unsigned, 4> RemappedClassIDs;
-    for (ArrayRef<Record*>::const_iterator
-           II = InstDefs.begin(), IE = InstDefs.end(); II != IE; ++II) {
-      unsigned OldSCIdx = InstrClassMap[*II];
-      if (OldSCIdx && RemappedClassIDs.insert(OldSCIdx).second) {
-        for (RecIter RI = SchedClasses[OldSCIdx].InstRWs.begin(),
-               RE = SchedClasses[OldSCIdx].InstRWs.end(); RI != RE; ++RI) {
-          if ((*RI)->getValueAsDef("SchedModel") == RWModelDef) {
-            PrintFatalError(InstRWDef->getLoc(), "Overlapping InstRW def " +
-                          (*II)->getName() + " also matches " +
-                          (*RI)->getValue("Instrs")->getValue()->getAsString());
+    // If we had an old class, copy it's InstRWs to this new class.
+    if (OldSCIdx) {
+      Record *RWModelDef = InstRWDef->getValueAsDef("SchedModel");
+      for (Record *OldRWDef : SchedClasses[OldSCIdx].InstRWs) {
+        if (OldRWDef->getValueAsDef("SchedModel") == RWModelDef) {
+          for (Record *InstDef : InstDefs) {
+            PrintFatalError(OldRWDef->getLoc(), "Overlapping InstRW def " +
+                       InstDef->getName() + " also matches " +
+                       OldRWDef->getValue("Instrs")->getValue()->getAsString());
           }
-          assert(*RI != InstRWDef && "SchedClass has duplicate InstRW def");
-          SC.InstRWs.push_back(*RI);
         }
+        assert(OldRWDef != InstRWDef &&
+               "SchedClass has duplicate InstRW def");
+        SC.InstRWs.push_back(OldRWDef);
       }
-      InstrClassMap[*II] = SCIdx;
     }
+    // Map each Instr to this new class.
+    for (Record *InstDef : InstDefs)
+      InstrClassMap[InstDef] = SCIdx;
     SC.InstRWs.push_back(InstRWDef);
   }
 }
 
 // True if collectProcItins found anything.
 bool CodeGenSchedModels::hasItineraries() const {
-  for (const CodeGenProcModel &PM : make_range(procModelBegin(),procModelEnd())) {
+  for (const CodeGenProcModel &PM : make_range(procModelBegin(),procModelEnd()))
     if (PM.hasItineraries())
       return true;
-  }
   return false;
 }
 
 // Gather the processor itineraries.
 void CodeGenSchedModels::collectProcItins() {
-  DEBUG(dbgs() << "\n+++ PROBLEM ITINERARIES (collectProcItins) +++\n");
+  LLVM_DEBUG(dbgs() << "\n+++ PROBLEM ITINERARIES (collectProcItins) +++\n");
   for (CodeGenProcModel &ProcModel : ProcModels) {
     if (!ProcModel.hasItineraries())
       continue;
@@ -798,37 +891,39 @@ void CodeGenSchedModels::collectProcItins() {
     // Insert each itinerary data record in the correct position within
     // the processor model's ItinDefList.
     for (Record *ItinData : ItinRecords) {
-      Record *ItinDef = ItinData->getValueAsDef("TheClass");
+      const Record *ItinDef = ItinData->getValueAsDef("TheClass");
       bool FoundClass = false;
-      for (SchedClassIter SCI = schedClassBegin(), SCE = schedClassEnd();
-           SCI != SCE; ++SCI) {
+
+      for (const CodeGenSchedClass &SC :
+           make_range(schedClassBegin(), schedClassEnd())) {
         // Multiple SchedClasses may share an itinerary. Update all of them.
-        if (SCI->ItinClassDef == ItinDef) {
-          ProcModel.ItinDefList[SCI->Index] = ItinData;
+        if (SC.ItinClassDef == ItinDef) {
+          ProcModel.ItinDefList[SC.Index] = ItinData;
           FoundClass = true;
         }
       }
       if (!FoundClass) {
-        DEBUG(dbgs() << ProcModel.ItinsDef->getName()
-              << " missing class for itinerary " << ItinDef->getName() << '\n');
+        LLVM_DEBUG(dbgs() << ProcModel.ItinsDef->getName()
+                          << " missing class for itinerary "
+                          << ItinDef->getName() << '\n');
       }
     }
     // Check for missing itinerary entries.
     assert(!ProcModel.ItinDefList[0] && "NoItinerary class can't have rec");
-    DEBUG(
-      for (unsigned i = 1, N = ProcModel.ItinDefList.size(); i < N; ++i) {
-        if (!ProcModel.ItinDefList[i])
-          dbgs() << ProcModel.ItinsDef->getName()
-                 << " missing itinerary for class "
-                 << SchedClasses[i].Name << '\n';
-      });
+    LLVM_DEBUG(
+        for (unsigned i = 1, N = ProcModel.ItinDefList.size(); i < N; ++i) {
+          if (!ProcModel.ItinDefList[i])
+            dbgs() << ProcModel.ItinsDef->getName()
+                   << " missing itinerary for class " << SchedClasses[i].Name
+                   << '\n';
+        });
   }
 }
 
 // Gather the read/write types for each itinerary class.
 void CodeGenSchedModels::collectProcItinRW() {
   RecVec ItinRWDefs = Records.getAllDerivedDefinitions("ItinRW");
-  std::sort(ItinRWDefs.begin(), ItinRWDefs.end(), LessRecord());
+  llvm::sort(ItinRWDefs.begin(), ItinRWDefs.end(), LessRecord());
   for (Record *RWDef  : ItinRWDefs) {
     if (!RWDef->getValueInit("SchedModel")->isComplete())
       PrintFatalError(RWDef->getLoc(), "SchedModel is undefined");
@@ -854,8 +949,9 @@ void CodeGenSchedModels::collectProcUnsupportedFeatures() {
 /// Infer new classes from existing classes. In the process, this may create new
 /// SchedWrites from sequences of existing SchedWrites.
 void CodeGenSchedModels::inferSchedClasses() {
-  DEBUG(dbgs() << "\n+++ INFERRING SCHED CLASSES (inferSchedClasses) +++\n");
-  DEBUG(dbgs() << NumInstrSchedClasses << " instr sched classes.\n");
+  LLVM_DEBUG(
+      dbgs() << "\n+++ INFERRING SCHED CLASSES (inferSchedClasses) +++\n");
+  LLVM_DEBUG(dbgs() << NumInstrSchedClasses << " instr sched classes.\n");
 
   // Visit all existing classes and newly created classes.
   for (unsigned Idx = 0; Idx != SchedClasses.size(); ++Idx) {
@@ -881,20 +977,18 @@ void CodeGenSchedModels::inferFromItinClass(Record *ItinClassDef,
     const CodeGenProcModel &PM = ProcModels[PIdx];
     // For all ItinRW entries.
     bool HasMatch = false;
-    for (RecIter II = PM.ItinRWDefs.begin(), IE = PM.ItinRWDefs.end();
-         II != IE; ++II) {
-      RecVec Matched = (*II)->getValueAsListOfDefs("MatchedItinClasses");
+    for (const Record *Rec : PM.ItinRWDefs) {
+      RecVec Matched = Rec->getValueAsListOfDefs("MatchedItinClasses");
       if (!std::count(Matched.begin(), Matched.end(), ItinClassDef))
         continue;
       if (HasMatch)
-        PrintFatalError((*II)->getLoc(), "Duplicate itinerary class "
+        PrintFatalError(Rec->getLoc(), "Duplicate itinerary class "
                       + ItinClassDef->getName()
                       + " in ItinResources for " + PM.ModelName);
       HasMatch = true;
       IdxVec Writes, Reads;
-      findRWs((*II)->getValueAsListOfDefs("OperandReadWrites"), Writes, Reads);
-      IdxVec ProcIndices(1, PIdx);
-      inferFromRW(Writes, Reads, FromClassIdx, ProcIndices);
+      findRWs(Rec->getValueAsListOfDefs("OperandReadWrites"), Writes, Reads);
+      inferFromRW(Writes, Reads, FromClassIdx, PIdx);
     }
   }
 }
@@ -917,8 +1011,7 @@ void CodeGenSchedModels::inferFromInstRWs(unsigned SCIdx) {
     IdxVec Writes, Reads;
     findRWs(Rec->getValueAsListOfDefs("OperandReadWrites"), Writes, Reads);
     unsigned PIdx = getProcModel(Rec->getValueAsDef("SchedModel")).Index;
-    IdxVec ProcIndices(1, PIdx);
-    inferFromRW(Writes, Reads, SCIdx, ProcIndices); // May mutate SchedClasses.
+    inferFromRW(Writes, Reads, SCIdx, PIdx); // May mutate SchedClasses.
   }
 }
 
@@ -1001,10 +1094,10 @@ bool PredTransitions::mutuallyExclusive(Record *PredDef,
     const CodeGenSchedRW &SchedRW = SchedModels.getSchedRW(PC.RWIdx, PC.IsRead);
     assert(SchedRW.HasVariants && "PredCheck must refer to a SchedVariant");
     RecVec Variants = SchedRW.TheDef->getValueAsListOfDefs("Variants");
-    for (RecIter VI = Variants.begin(), VE = Variants.end(); VI != VE; ++VI) {
-      if ((*VI)->getValueAsDef("Predicate") == PredDef)
-        return true;
-    }
+    if (any_of(Variants, [PredDef](const Record *R) {
+          return R->getValueAsDef("Predicate") == PredDef;
+        }))
+      return true;
   }
   return false;
 }
@@ -1022,12 +1115,10 @@ static bool hasAliasedVariants(const CodeGenSchedRW &RW,
     if (AliasRW.IsSequence) {
       IdxVec ExpandedRWs;
       SchedModels.expandRWSequence(AliasRW.Index, ExpandedRWs, AliasRW.IsRead);
-      for (IdxIter SI = ExpandedRWs.begin(), SE = ExpandedRWs.end();
-           SI != SE; ++SI) {
-        if (hasAliasedVariants(SchedModels.getSchedRW(*SI, AliasRW.IsRead),
-                               SchedModels)) {
+      for (unsigned SI : ExpandedRWs) {
+        if (hasAliasedVariants(SchedModels.getSchedRW(SI, AliasRW.IsRead),
+                               SchedModels))
           return true;
-        }
       }
     }
   }
@@ -1036,27 +1127,16 @@ static bool hasAliasedVariants(const CodeGenSchedRW &RW,
 
 static bool hasVariant(ArrayRef<PredTransition> Transitions,
                        CodeGenSchedModels &SchedModels) {
-  for (ArrayRef<PredTransition>::iterator
-         PTI = Transitions.begin(), PTE = Transitions.end();
-       PTI != PTE; ++PTI) {
-    for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
-           WSI = PTI->WriteSequences.begin(), WSE = PTI->WriteSequences.end();
-         WSI != WSE; ++WSI) {
-      for (SmallVectorImpl<unsigned>::const_iterator
-             WI = WSI->begin(), WE = WSI->end(); WI != WE; ++WI) {
-        if (hasAliasedVariants(SchedModels.getSchedWrite(*WI), SchedModels))
+  for (const PredTransition &PTI : Transitions) {
+    for (const SmallVectorImpl<unsigned> &WSI : PTI.WriteSequences)
+      for (unsigned WI : WSI)
+        if (hasAliasedVariants(SchedModels.getSchedWrite(WI), SchedModels))
           return true;
-      }
-    }
-    for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
-           RSI = PTI->ReadSequences.begin(), RSE = PTI->ReadSequences.end();
-         RSI != RSE; ++RSI) {
-      for (SmallVectorImpl<unsigned>::const_iterator
-             RI = RSI->begin(), RE = RSI->end(); RI != RE; ++RI) {
-        if (hasAliasedVariants(SchedModels.getSchedRead(*RI), SchedModels))
+
+    for (const SmallVectorImpl<unsigned> &RSI : PTI.ReadSequences)
+      for (unsigned RI : RSI)
+        if (hasAliasedVariants(SchedModels.getSchedRead(RI), SchedModels))
           return true;
-      }
-    }
   }
   return false;
 }
@@ -1080,7 +1160,7 @@ void PredTransitions::getIntersectingVariants(
     // Push each variant. Assign TransVecIdx later.
     const RecVec VarDefs = SchedRW.TheDef->getValueAsListOfDefs("Variants");
     for (Record *VarDef : VarDefs)
-      Variants.push_back(TransVariant(VarDef, SchedRW.Index, VarProcIdx, 0));
+      Variants.emplace_back(VarDef, SchedRW.Index, VarProcIdx, 0);
     if (VarProcIdx == 0)
       GenericRW = true;
   }
@@ -1100,12 +1180,10 @@ void PredTransitions::getIntersectingVariants(
     if (AliasRW.HasVariants) {
       const RecVec VarDefs = AliasRW.TheDef->getValueAsListOfDefs("Variants");
       for (Record *VD : VarDefs)
-        Variants.push_back(TransVariant(VD, AliasRW.Index, AliasProcIdx, 0));
-    }
-    if (AliasRW.IsSequence) {
-      Variants.push_back(
-        TransVariant(AliasRW.TheDef, SchedRW.Index, AliasProcIdx, 0));
+        Variants.emplace_back(VD, AliasRW.Index, AliasProcIdx, 0);
     }
+    if (AliasRW.IsSequence)
+      Variants.emplace_back(AliasRW.TheDef, SchedRW.Index, AliasProcIdx, 0);
     if (AliasProcIdx == 0)
       GenericRW = true;
   }
@@ -1164,7 +1242,7 @@ pushVariant(const TransVariant &VInfo, bool IsRead) {
   IdxVec SelectedRWs;
   if (VInfo.VarOrSeqDef->isSubClassOf("SchedVar")) {
     Record *PredDef = VInfo.VarOrSeqDef->getValueAsDef("Predicate");
-    Trans.PredTerm.push_back(PredCheck(IsRead, VInfo.RWIdx,PredDef));
+    Trans.PredTerm.emplace_back(IsRead, VInfo.RWIdx,PredDef);
     RecVec SelectedDefs = VInfo.VarOrSeqDef->getValueAsListOfDefs("Selected");
     SchedModels.findRWs(SelectedDefs, SelectedRWs, IsRead);
   }
@@ -1181,11 +1259,8 @@ pushVariant(const TransVariant &VInfo, bool IsRead) {
   if (SchedRW.IsVariadic) {
     unsigned OperIdx = RWSequences.size()-1;
     // Make N-1 copies of this transition's last sequence.
-    for (unsigned i = 1, e = SelectedRWs.size(); i != e; ++i) {
-      // Create a temporary copy the vector could reallocate.
-      RWSequences.reserve(RWSequences.size() + 1);
-      RWSequences.push_back(RWSequences[OperIdx]);
-    }
+    RWSequences.insert(RWSequences.end(), SelectedRWs.size() - 1,
+                       RWSequences[OperIdx]);
     // Push each of the N elements of the SelectedRWs onto a copy of the last
     // sequence (split the current operand into N operands).
     // Note that write sequences should be expanded within this loop--the entire
@@ -1267,7 +1342,7 @@ void PredTransitions::substituteVariants(const PredTransition &Trans) {
   // Build up a set of partial results starting at the back of
   // PredTransitions. Remember the first new transition.
   unsigned StartIdx = TransVec.size();
-  TransVec.resize(TransVec.size() + 1);
+  TransVec.emplace_back();
   TransVec.back().PredTerm = Trans.PredTerm;
   TransVec.back().ProcIndices = Trans.ProcIndices;
 
@@ -1278,7 +1353,7 @@ void PredTransitions::substituteVariants(const PredTransition &Trans) {
     // Push a new (empty) write sequence onto all partial Transitions.
     for (std::vector<PredTransition>::iterator I =
            TransVec.begin() + StartIdx, E = TransVec.end(); I != E; ++I) {
-      I->WriteSequences.resize(I->WriteSequences.size() + 1);
+      I->WriteSequences.emplace_back();
     }
     substituteVariantOperand(*WSI, /*IsRead=*/false, StartIdx);
   }
@@ -1289,7 +1364,7 @@ void PredTransitions::substituteVariants(const PredTransition &Trans) {
     // Push a new (empty) read sequence onto all partial Transitions.
     for (std::vector<PredTransition>::iterator I =
            TransVec.begin() + StartIdx, E = TransVec.end(); I != E; ++I) {
-      I->ReadSequences.resize(I->ReadSequences.size() + 1);
+      I->ReadSequences.emplace_back();
     }
     substituteVariantOperand(*RSI, /*IsRead=*/true, StartIdx);
   }
@@ -1304,37 +1379,30 @@ static void inferFromTransitions(ArrayRef<PredTransition> LastTransitions,
   for (ArrayRef<PredTransition>::iterator
          I = LastTransitions.begin(), E = LastTransitions.end(); I != E; ++I) {
     IdxVec OperWritesVariant;
-    for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
-           WSI = I->WriteSequences.begin(), WSE = I->WriteSequences.end();
-         WSI != WSE; ++WSI) {
-      // Create a new write representing the expanded sequence.
-      OperWritesVariant.push_back(
-        SchedModels.findOrInsertRW(*WSI, /*IsRead=*/false));
-    }
+    transform(I->WriteSequences, std::back_inserter(OperWritesVariant),
+              [&SchedModels](ArrayRef<unsigned> WS) {
+                return SchedModels.findOrInsertRW(WS, /*IsRead=*/false);
+              });
     IdxVec OperReadsVariant;
-    for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
-           RSI = I->ReadSequences.begin(), RSE = I->ReadSequences.end();
-         RSI != RSE; ++RSI) {
-      // Create a new read representing the expanded sequence.
-      OperReadsVariant.push_back(
-        SchedModels.findOrInsertRW(*RSI, /*IsRead=*/true));
-    }
-    IdxVec ProcIndices(I->ProcIndices.begin(), I->ProcIndices.end());
+    transform(I->ReadSequences, std::back_inserter(OperReadsVariant),
+              [&SchedModels](ArrayRef<unsigned> RS) {
+                return SchedModels.findOrInsertRW(RS, /*IsRead=*/true);
+              });
     CodeGenSchedTransition SCTrans;
     SCTrans.ToClassIdx =
       SchedModels.addSchedClass(/*ItinClassDef=*/nullptr, OperWritesVariant,
-                                OperReadsVariant, ProcIndices);
-    SCTrans.ProcIndices = ProcIndices;
+                                OperReadsVariant, I->ProcIndices);
+    SCTrans.ProcIndices.assign(I->ProcIndices.begin(), I->ProcIndices.end());
     // The final PredTerm is unique set of predicates guarding the transition.
     RecVec Preds;
-    for (SmallVectorImpl<PredCheck>::const_iterator
-           PI = I->PredTerm.begin(), PE = I->PredTerm.end(); PI != PE; ++PI) {
-      Preds.push_back(PI->Predicate);
-    }
-    RecIter PredsEnd = std::unique(Preds.begin(), Preds.end());
-    Preds.resize(PredsEnd - Preds.begin());
-    SCTrans.PredTerm = Preds;
-    SchedModels.getSchedClass(FromClassIdx).Transitions.push_back(SCTrans);
+    transform(I->PredTerm, std::back_inserter(Preds),
+              [](const PredCheck &P) {
+                return P.Predicate;
+              });
+    Preds.erase(std::unique(Preds.begin(), Preds.end()), Preds.end());
+    SCTrans.PredTerm = std::move(Preds);
+    SchedModels.getSchedClass(FromClassIdx)
+        .Transitions.push_back(std::move(SCTrans));
   }
 }
 
@@ -1345,48 +1413,42 @@ void CodeGenSchedModels::inferFromRW(ArrayRef<unsigned> OperWrites,
                                      ArrayRef<unsigned> OperReads,
                                      unsigned FromClassIdx,
                                      ArrayRef<unsigned> ProcIndices) {
-  DEBUG(dbgs() << "INFER RW proc("; dumpIdxVec(ProcIndices); dbgs() << ") ");
+  LLVM_DEBUG(dbgs() << "INFER RW proc("; dumpIdxVec(ProcIndices);
+             dbgs() << ") ");
 
   // Create a seed transition with an empty PredTerm and the expanded sequences
   // of SchedWrites for the current SchedClass.
   std::vector<PredTransition> LastTransitions;
-  LastTransitions.resize(1);
+  LastTransitions.emplace_back();
   LastTransitions.back().ProcIndices.append(ProcIndices.begin(),
                                             ProcIndices.end());
 
   for (unsigned WriteIdx : OperWrites) {
     IdxVec WriteSeq;
     expandRWSequence(WriteIdx, WriteSeq, /*IsRead=*/false);
-    unsigned Idx = LastTransitions[0].WriteSequences.size();
-    LastTransitions[0].WriteSequences.resize(Idx + 1);
-    SmallVectorImpl<unsigned> &Seq = LastTransitions[0].WriteSequences[Idx];
-    for (IdxIter WI = WriteSeq.begin(), WE = WriteSeq.end(); WI != WE; ++WI)
-      Seq.push_back(*WI);
-    DEBUG(dbgs() << "("; dumpIdxVec(Seq); dbgs() << ") ");
-  }
-  DEBUG(dbgs() << " Reads: ");
+    LastTransitions[0].WriteSequences.emplace_back();
+    SmallVectorImpl<unsigned> &Seq = LastTransitions[0].WriteSequences.back();
+    Seq.append(WriteSeq.begin(), WriteSeq.end());
+    LLVM_DEBUG(dbgs() << "("; dumpIdxVec(Seq); dbgs() << ") ");
+  }
+  LLVM_DEBUG(dbgs() << " Reads: ");
   for (unsigned ReadIdx : OperReads) {
     IdxVec ReadSeq;
     expandRWSequence(ReadIdx, ReadSeq, /*IsRead=*/true);
-    unsigned Idx = LastTransitions[0].ReadSequences.size();
-    LastTransitions[0].ReadSequences.resize(Idx + 1);
-    SmallVectorImpl<unsigned> &Seq = LastTransitions[0].ReadSequences[Idx];
-    for (IdxIter RI = ReadSeq.begin(), RE = ReadSeq.end(); RI != RE; ++RI)
-      Seq.push_back(*RI);
-    DEBUG(dbgs() << "("; dumpIdxVec(Seq); dbgs() << ") ");
+    LastTransitions[0].ReadSequences.emplace_back();
+    SmallVectorImpl<unsigned> &Seq = LastTransitions[0].ReadSequences.back();
+    Seq.append(ReadSeq.begin(), ReadSeq.end());
+    LLVM_DEBUG(dbgs() << "("; dumpIdxVec(Seq); dbgs() << ") ");
   }
-  DEBUG(dbgs() << '\n');
+  LLVM_DEBUG(dbgs() << '\n');
 
   // Collect all PredTransitions for individual operands.
   // Iterate until no variant writes remain.
   while (hasVariant(LastTransitions, *this)) {
     PredTransitions Transitions(*this);
-    for (std::vector<PredTransition>::const_iterator
-           I = LastTransitions.begin(), E = LastTransitions.end();
-         I != E; ++I) {
-      Transitions.substituteVariants(*I);
-    }
-    DEBUG(Transitions.dump());
+    for (const PredTransition &Trans : LastTransitions)
+      Transitions.substituteVariants(Trans);
+    LLVM_DEBUG(Transitions.dump());
     LastTransitions.swap(Transitions.TransVec);
   }
   // If the first transition has no variants, nothing to do.
@@ -1447,6 +1509,47 @@ void CodeGenSchedModels::verifyProcResourceGroups(CodeGenProcModel &PM) {
   }
 }
 
+// Collect all the RegisterFile definitions available in this target.
+void CodeGenSchedModels::collectRegisterFiles() {
+  RecVec RegisterFileDefs = Records.getAllDerivedDefinitions("RegisterFile");
+
+  // RegisterFiles is the vector of CodeGenRegisterFile.
+  for (Record *RF : RegisterFileDefs) {
+    // For each register file definition, construct a CodeGenRegisterFile object
+    // and add it to the appropriate scheduling model.
+    CodeGenProcModel &PM = getProcModel(RF->getValueAsDef("SchedModel"));
+    PM.RegisterFiles.emplace_back(CodeGenRegisterFile(RF->getName(),RF));
+    CodeGenRegisterFile &CGRF = PM.RegisterFiles.back();
+
+    // Now set the number of physical registers as well as the cost of registers
+    // in each register class.
+    CGRF.NumPhysRegs = RF->getValueAsInt("NumPhysRegs");
+    RecVec RegisterClasses = RF->getValueAsListOfDefs("RegClasses");
+    std::vector<int64_t> RegisterCosts = RF->getValueAsListOfInts("RegCosts");
+    for (unsigned I = 0, E = RegisterClasses.size(); I < E; ++I) {
+      int Cost = RegisterCosts.size() > I ? RegisterCosts[I] : 1;
+      CGRF.Costs.emplace_back(RegisterClasses[I], Cost);
+    }
+  }
+}
+
+// Collect all the RegisterFile definitions available in this target.
+void CodeGenSchedModels::collectPfmCounters() {
+  for (Record *Def : Records.getAllDerivedDefinitions("PfmIssueCounter")) {
+    CodeGenProcModel &PM = getProcModel(Def->getValueAsDef("SchedModel"));
+    PM.PfmIssueCounterDefs.emplace_back(Def);
+  }
+  for (Record *Def : Records.getAllDerivedDefinitions("PfmCycleCounter")) {
+    CodeGenProcModel &PM = getProcModel(Def->getValueAsDef("SchedModel"));
+    if (PM.PfmCycleCounterDef) {
+      PrintFatalError(Def->getLoc(),
+                      "multiple cycle counters for " +
+                          Def->getValueAsDef("SchedModel")->getName());
+    }
+    PM.PfmCycleCounterDef = Def;
+  }
+}
+
 // Collect and sort WriteRes, ReadAdvance, and ProcResources.
 void CodeGenSchedModels::collectProcResources() {
   ProcResourceDefs = Records.getAllDerivedDefinitions("ProcResourceUnits");
@@ -1455,26 +1558,24 @@ void CodeGenSchedModels::collectProcResources() {
   // Add any subtarget-specific SchedReadWrites that are directly associated
   // with processor resources. Refer to the parent SchedClass's ProcIndices to
   // determine which processors they apply to.
-  for (SchedClassIter SCI = schedClassBegin(), SCE = schedClassEnd();
-       SCI != SCE; ++SCI) {
-    if (SCI->ItinClassDef)
-      collectItinProcResources(SCI->ItinClassDef);
-    else {
-      // This class may have a default ReadWrite list which can be overriden by
-      // InstRW definitions.
-      if (!SCI->InstRWs.empty()) {
-        for (RecIter RWI = SCI->InstRWs.begin(), RWE = SCI->InstRWs.end();
-             RWI != RWE; ++RWI) {
-          Record *RWModelDef = (*RWI)->getValueAsDef("SchedModel");
-          IdxVec ProcIndices(1, getProcModel(RWModelDef).Index);
-          IdxVec Writes, Reads;
-          findRWs((*RWI)->getValueAsListOfDefs("OperandReadWrites"),
-                  Writes, Reads);
-          collectRWResources(Writes, Reads, ProcIndices);
-        }
-      }
-      collectRWResources(SCI->Writes, SCI->Reads, SCI->ProcIndices);
+  for (const CodeGenSchedClass &SC :
+       make_range(schedClassBegin(), schedClassEnd())) {
+    if (SC.ItinClassDef) {
+      collectItinProcResources(SC.ItinClassDef);
+      continue;
+    }
+
+    // This class may have a default ReadWrite list which can be overriden by
+    // InstRW definitions.
+    for (Record *RW : SC.InstRWs) {
+      Record *RWModelDef = RW->getValueAsDef("SchedModel");
+      unsigned PIdx = getProcModel(RWModelDef).Index;
+      IdxVec Writes, Reads;
+      findRWs(RW->getValueAsListOfDefs("OperandReadWrites"), Writes, Reads);
+      collectRWResources(Writes, Reads, PIdx);
     }
+
+    collectRWResources(SC.Writes, SC.Reads, SC.ProcIndices);
   }
   // Add resources separately defined by each subtarget.
   RecVec WRDefs = Records.getAllDerivedDefinitions("WriteRes");
@@ -1509,38 +1610,45 @@ void CodeGenSchedModels::collectProcResources() {
     if (!is_contained(PM.ProcResourceDefs, PRG))
       PM.ProcResourceDefs.push_back(PRG);
   }
+  // Add ProcResourceUnits unconditionally.
+  for (Record *PRU : Records.getAllDerivedDefinitions("ProcResourceUnits")) {
+    if (!PRU->getValueInit("SchedModel")->isComplete())
+      continue;
+    CodeGenProcModel &PM = getProcModel(PRU->getValueAsDef("SchedModel"));
+    if (!is_contained(PM.ProcResourceDefs, PRU))
+      PM.ProcResourceDefs.push_back(PRU);
+  }
   // Finalize each ProcModel by sorting the record arrays.
   for (CodeGenProcModel &PM : ProcModels) {
-    std::sort(PM.WriteResDefs.begin(), PM.WriteResDefs.end(),
-              LessRecord());
-    std::sort(PM.ReadAdvanceDefs.begin(), PM.ReadAdvanceDefs.end(),
-              LessRecord());
-    std::sort(PM.ProcResourceDefs.begin(), PM.ProcResourceDefs.end(),
-              LessRecord());
-    DEBUG(
-      PM.dump();
-      dbgs() << "WriteResDefs: ";
-      for (RecIter RI = PM.WriteResDefs.begin(),
-             RE = PM.WriteResDefs.end(); RI != RE; ++RI) {
-        if ((*RI)->isSubClassOf("WriteRes"))
-          dbgs() << (*RI)->getValueAsDef("WriteType")->getName() << " ";
-        else
-          dbgs() << (*RI)->getName() << " ";
-      }
-      dbgs() << "\nReadAdvanceDefs: ";
-      for (RecIter RI = PM.ReadAdvanceDefs.begin(),
-             RE = PM.ReadAdvanceDefs.end(); RI != RE; ++RI) {
-        if ((*RI)->isSubClassOf("ReadAdvance"))
-          dbgs() << (*RI)->getValueAsDef("ReadType")->getName() << " ";
-        else
-          dbgs() << (*RI)->getName() << " ";
-      }
-      dbgs() << "\nProcResourceDefs: ";
-      for (RecIter RI = PM.ProcResourceDefs.begin(),
-             RE = PM.ProcResourceDefs.end(); RI != RE; ++RI) {
-        dbgs() << (*RI)->getName() << " ";
-      }
-      dbgs() << '\n');
+    llvm::sort(PM.WriteResDefs.begin(), PM.WriteResDefs.end(),
+               LessRecord());
+    llvm::sort(PM.ReadAdvanceDefs.begin(), PM.ReadAdvanceDefs.end(),
+               LessRecord());
+    llvm::sort(PM.ProcResourceDefs.begin(), PM.ProcResourceDefs.end(),
+               LessRecord());
+    LLVM_DEBUG(
+        PM.dump();
+        dbgs() << "WriteResDefs: "; for (RecIter RI = PM.WriteResDefs.begin(),
+                                         RE = PM.WriteResDefs.end();
+                                         RI != RE; ++RI) {
+          if ((*RI)->isSubClassOf("WriteRes"))
+            dbgs() << (*RI)->getValueAsDef("WriteType")->getName() << " ";
+          else
+            dbgs() << (*RI)->getName() << " ";
+        } dbgs() << "\nReadAdvanceDefs: ";
+        for (RecIter RI = PM.ReadAdvanceDefs.begin(),
+             RE = PM.ReadAdvanceDefs.end();
+             RI != RE; ++RI) {
+          if ((*RI)->isSubClassOf("ReadAdvance"))
+            dbgs() << (*RI)->getValueAsDef("ReadType")->getName() << " ";
+          else
+            dbgs() << (*RI)->getName() << " ";
+        } dbgs()
+        << "\nProcResourceDefs: ";
+        for (RecIter RI = PM.ProcResourceDefs.begin(),
+             RE = PM.ProcResourceDefs.end();
+             RI != RE; ++RI) { dbgs() << (*RI)->getName() << " "; } dbgs()
+        << '\n');
     verifyProcResourceGroups(PM);
   }
 
@@ -1552,6 +1660,7 @@ void CodeGenSchedModels::checkCompleteness() {
   bool Complete = true;
   bool HadCompleteModel = false;
   for (const CodeGenProcModel &ProcModel : procModels()) {
+    const bool HasItineraries = ProcModel.hasItineraries();
     if (!ProcModel.ModelDef->getValueAsBit("CompleteModel"))
       continue;
     for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue()) {
@@ -1572,7 +1681,7 @@ void CodeGenSchedModels::checkCompleteness() {
       const CodeGenSchedClass &SC = getSchedClass(SCIdx);
       if (!SC.Writes.empty())
         continue;
-      if (SC.ItinClassDef != nullptr &&
+      if (HasItineraries && SC.ItinClassDef != nullptr &&
           SC.ItinClassDef->getName() != "NoItinerary")
         continue;
 
@@ -1619,8 +1728,7 @@ void CodeGenSchedModels::collectItinProcResources(Record *ItinClassDef) {
       HasMatch = true;
       IdxVec Writes, Reads;
       findRWs((*II)->getValueAsListOfDefs("OperandReadWrites"), Writes, Reads);
-      IdxVec ProcIndices(1, PIdx);
-      collectRWResources(Writes, Reads, ProcIndices);
+      collectRWResources(Writes, Reads, PIdx);
     }
   }
 }
diff --git a/utils/TableGen/CodeGenSchedule.h b/utils/TableGen/CodeGenSchedule.h
index 46e22cd12810..07c11596adee 100644
--- a/utils/TableGen/CodeGenSchedule.h
+++ b/utils/TableGen/CodeGenSchedule.h
@@ -26,6 +26,7 @@ namespace llvm {
 class CodeGenTarget;
 class CodeGenSchedModels;
 class CodeGenInstruction;
+class CodeGenRegisterClass;
 
 using RecVec = std::vector<Record*>;
 using RecIter = std::vector<Record*>::const_iterator;
@@ -33,9 +34,6 @@ using RecIter = std::vector<Record*>::const_iterator;
 using IdxVec = std::vector<unsigned>;
 using IdxIter = std::vector<unsigned>::const_iterator;
 
-void splitSchedReadWrites(const RecVec &RWDefs,
-                          RecVec &WriteDefs, RecVec &ReadDefs);
-
 /// We have two kinds of SchedReadWrites. Explicitly defined and inferred
 /// sequences.  TheDef is nonnull for explicit SchedWrites, but Sequence may or
 /// may not be empty. TheDef is null for inferred sequences, and Sequence must
@@ -142,9 +140,11 @@ struct CodeGenSchedClass {
   // off to join another inferred class.
   RecVec InstRWs;
 
-  CodeGenSchedClass(): Index(0), ItinClassDef(nullptr) {}
+  CodeGenSchedClass(unsigned Index, std::string Name, Record *ItinClassDef)
+    : Index(Index), Name(std::move(Name)), ItinClassDef(ItinClassDef) {}
 
-  bool isKeyEqual(Record *IC, ArrayRef<unsigned> W, ArrayRef<unsigned> R) {
+  bool isKeyEqual(Record *IC, ArrayRef<unsigned> W,
+                  ArrayRef<unsigned> R) const {
     return ItinClassDef == IC && makeArrayRef(Writes) == W &&
            makeArrayRef(Reads) == R;
   }
@@ -158,6 +158,38 @@ struct CodeGenSchedClass {
 #endif
 };
 
+/// Represent the cost of allocating a register of register class RCDef.
+///
+/// The cost of allocating a register is equivalent to the number of physical
+/// registers used by the register renamer. Register costs are defined at
+/// register class granularity.
+struct CodeGenRegisterCost {
+  Record *RCDef;
+  unsigned Cost;
+  CodeGenRegisterCost(Record *RC, unsigned RegisterCost)
+      : RCDef(RC), Cost(RegisterCost) {}
+  CodeGenRegisterCost(const CodeGenRegisterCost &) = default;
+  CodeGenRegisterCost &operator=(const CodeGenRegisterCost &) = delete;
+};
+
+/// A processor register file.
+///
+/// This class describes a processor register file. Register file information is
+/// currently consumed by external tools like llvm-mca to predict dispatch
+/// stalls due to register pressure.
+struct CodeGenRegisterFile {
+  std::string Name;
+  Record *RegisterFileDef;
+
+  unsigned NumPhysRegs;
+  std::vector<CodeGenRegisterCost> Costs;
+
+  CodeGenRegisterFile(StringRef name, Record *def)
+      : Name(name), RegisterFileDef(def), NumPhysRegs(0) {}
+
+  bool hasDefaultCosts() const { return Costs.empty(); }
+};
+
 // Processor model.
 //
 // ModelName is a unique name used to name an instantiation of MCSchedModel.
@@ -199,11 +231,21 @@ struct CodeGenProcModel {
 
   // Per-operand machine model resources associated with this processor.
   RecVec ProcResourceDefs;
-  RecVec ProcResGroupDefs;
 
-  CodeGenProcModel(unsigned Idx, const std::string &Name, Record *MDef,
+  // List of Register Files.
+  std::vector<CodeGenRegisterFile> RegisterFiles;
+
+  // Optional Retire Control Unit definition.
+  Record *RetireControlUnit;
+
+  // List of PfmCounters.
+  RecVec PfmIssueCounterDefs;
+  Record *PfmCycleCounterDef = nullptr;
+
+  CodeGenProcModel(unsigned Idx, std::string Name, Record *MDef,
                    Record *IDef) :
-    Index(Idx), ModelName(Name), ModelDef(MDef), ItinsDef(IDef) {}
+    Index(Idx), ModelName(std::move(Name)), ModelDef(MDef), ItinsDef(IDef),
+    RetireControlUnit(nullptr) {}
 
   bool hasItineraries() const {
     return !ItinsDef->getValueAsListOfDefs("IID").empty();
@@ -213,6 +255,12 @@ struct CodeGenProcModel {
     return !WriteResDefs.empty() || !ItinRWDefs.empty();
   }
 
+  bool hasExtraProcessorInfo() const {
+    return RetireControlUnit || !RegisterFiles.empty() ||
+        !PfmIssueCounterDefs.empty() ||
+        PfmCycleCounterDef != nullptr;
+  }
+
   unsigned getProcResourceIdx(Record *PRDef) const;
 
   bool isUnsupported(const CodeGenInstruction &Inst) const;
@@ -336,11 +384,11 @@ public:
     return const_cast<CodeGenSchedRW&>(
       IsRead ? getSchedRead(Idx) : getSchedWrite(Idx));
   }
-  const CodeGenSchedRW &getSchedRW(Record*Def) const {
+  const CodeGenSchedRW &getSchedRW(Record *Def) const {
     return const_cast<CodeGenSchedModels&>(*this).getSchedRW(Def);
   }
 
-  unsigned getSchedRWIdx(Record *Def, bool IsRead, unsigned After = 0) const;
+  unsigned getSchedRWIdx(const Record *Def, bool IsRead) const;
 
   // Return true if the given write record is referenced by a ReadAdvance.
   bool hasReadOfWrite(Record *WriteDef) const;
@@ -379,9 +427,6 @@ public:
 
   unsigned findOrInsertRW(ArrayRef<unsigned> Seq, bool IsRead);
 
-  unsigned findSchedClassIdx(Record *ItinClassDef, ArrayRef<unsigned> Writes,
-                             ArrayRef<unsigned> Reads) const;
-
   Record *findProcResUnits(Record *ProcResKind, const CodeGenProcModel &PM,
                            ArrayRef<SMLoc> Loc) const;
 
@@ -398,6 +443,14 @@ private:
 
   void collectSchedClasses();
 
+  void collectRetireControlUnits();
+
+  void collectRegisterFiles();
+
+  void collectPfmCounters();
+
+  void collectOptionalProcessorInfo();
+
   std::string createSchedClassName(Record *ItinClassDef,
                                    ArrayRef<unsigned> OperWrites,
                                    ArrayRef<unsigned> OperReads);
diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp
index 168bd690831f..cb73ca83c9bb 100644
--- a/utils/TableGen/CodeGenTarget.cpp
+++ b/utils/TableGen/CodeGenTarget.cpp
@@ -174,6 +174,7 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
   case MVT::iPTR:     return "MVT::iPTR";
   case MVT::iPTRAny:  return "MVT::iPTRAny";
   case MVT::Untyped:  return "MVT::Untyped";
+  case MVT::ExceptRef: return "MVT::ExceptRef";
   default: llvm_unreachable("ILLEGAL VALUE TYPE!");
   }
 }
@@ -224,6 +225,9 @@ Record *CodeGenTarget::getInstructionSet() const {
   return TargetRec->getValueAsDef("InstructionSet");
 }
 
+bool CodeGenTarget::getAllowRegisterRenaming() const {
+  return TargetRec->getValueAsInt("AllowRegisterRenaming");
+}
 
 /// getAsmParser - Return the AssemblyParser definition for this target.
 ///
@@ -274,7 +278,7 @@ CodeGenRegBank &CodeGenTarget::getRegBank() const {
 
 void CodeGenTarget::ReadRegAltNameIndices() const {
   RegAltNameIndices = Records.getAllDerivedDefinitions("RegAltNameIndex");
-  std::sort(RegAltNameIndices.begin(), RegAltNameIndices.end(), LessRecord());
+  llvm::sort(RegAltNameIndices.begin(), RegAltNameIndices.end(), LessRecord());
 }
 
 /// getRegisterByName - If there is a register with the specific AsmName,
@@ -299,7 +303,7 @@ std::vector<ValueTypeByHwMode> CodeGenTarget::getRegisterVTs(Record *R)
   }
 
   // Remove duplicates.
-  std::sort(Result.begin(), Result.end());
+  llvm::sort(Result.begin(), Result.end());
   Result.erase(std::unique(Result.begin(), Result.end()), Result.end());
   return Result;
 }
@@ -310,7 +314,7 @@ void CodeGenTarget::ReadLegalValueTypes() const {
     LegalValueTypes.insert(LegalValueTypes.end(), RC.VTs.begin(), RC.VTs.end());
 
   // Remove duplicates.
-  std::sort(LegalValueTypes.begin(), LegalValueTypes.end());
+  llvm::sort(LegalValueTypes.begin(), LegalValueTypes.end());
   LegalValueTypes.erase(std::unique(LegalValueTypes.begin(),
                                     LegalValueTypes.end()),
                         LegalValueTypes.end());
@@ -345,13 +349,18 @@ GetInstByName(const char *Name,
   return I->second.get();
 }
 
-/// \brief Return all of the instructions defined by the target, ordered by
+static const char *const FixedInstrs[] = {
+#define HANDLE_TARGET_OPCODE(OPC) #OPC,
+#include "llvm/Support/TargetOpcodes.def"
+    nullptr};
+
+unsigned CodeGenTarget::getNumFixedInstructions() {
+  return array_lengthof(FixedInstrs) - 1;
+}
+
+/// Return all of the instructions defined by the target, ordered by
 /// their enum value.
 void CodeGenTarget::ComputeInstrsByEnum() const {
-  static const char *const FixedInstrs[] = {
-#define HANDLE_TARGET_OPCODE(OPC) #OPC,
-#include "llvm/CodeGen/TargetOpcodes.def"
-      nullptr};
   const auto &Insts = getInstructions();
   for (const char *const *p = FixedInstrs; *p; ++p) {
     const CodeGenInstruction *Instr = GetInstByName(*p, Insts, Records);
@@ -360,21 +369,29 @@ void CodeGenTarget::ComputeInstrsByEnum() const {
     InstrsByEnum.push_back(Instr);
   }
   unsigned EndOfPredefines = InstrsByEnum.size();
+  assert(EndOfPredefines == getNumFixedInstructions() &&
+         "Missing generic opcode");
 
   for (const auto &I : Insts) {
     const CodeGenInstruction *CGI = I.second.get();
-    if (CGI->Namespace != "TargetOpcode")
+    if (CGI->Namespace != "TargetOpcode") {
       InstrsByEnum.push_back(CGI);
+      if (CGI->TheDef->getValueAsBit("isPseudo"))
+        ++NumPseudoInstructions;
+    }
   }
 
   assert(InstrsByEnum.size() == Insts.size() && "Missing predefined instr");
 
   // All of the instructions are now in random order based on the map iteration.
-  // Sort them by name.
-  std::sort(InstrsByEnum.begin() + EndOfPredefines, InstrsByEnum.end(),
-            [](const CodeGenInstruction *Rec1, const CodeGenInstruction *Rec2) {
-    return Rec1->TheDef->getName() < Rec2->TheDef->getName();
-  });
+  llvm::sort(
+      InstrsByEnum.begin() + EndOfPredefines, InstrsByEnum.end(),
+      [](const CodeGenInstruction *Rec1, const CodeGenInstruction *Rec2) {
+        const auto &D1 = *Rec1->TheDef;
+        const auto &D2 = *Rec2->TheDef;
+        return std::make_tuple(!D1.getValueAsBit("isPseudo"), D1.getName()) <
+               std::make_tuple(!D2.getValueAsBit("isPseudo"), D2.getName());
+      });
 }
 
 
@@ -496,11 +513,11 @@ CodeGenIntrinsicTable::CodeGenIntrinsicTable(const RecordKeeper &RC,
     if (isTarget == TargetOnly)
       Intrinsics.push_back(CodeGenIntrinsic(Defs[I]));
   }
-  std::sort(Intrinsics.begin(), Intrinsics.end(),
-            [](const CodeGenIntrinsic &LHS, const CodeGenIntrinsic &RHS) {
-              return std::tie(LHS.TargetPrefix, LHS.Name) <
-                     std::tie(RHS.TargetPrefix, RHS.Name);
-            });
+  llvm::sort(Intrinsics.begin(), Intrinsics.end(),
+             [](const CodeGenIntrinsic &LHS, const CodeGenIntrinsic &RHS) {
+               return std::tie(LHS.TargetPrefix, LHS.Name) <
+                      std::tie(RHS.TargetPrefix, RHS.Name);
+             });
   Targets.push_back({"", 0, 0});
   for (size_t I = 0, E = Intrinsics.size(); I < E; ++I)
     if (Intrinsics[I].TargetPrefix != Targets.back().Name) {
@@ -604,8 +621,12 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
     MVT::SimpleValueType VT;
     if (TyEl->isSubClassOf("LLVMMatchType")) {
       unsigned MatchTy = TyEl->getValueAsInt("Number");
-      assert(MatchTy < OverloadedVTs.size() &&
-             "Invalid matching number!");
+      if (MatchTy >= OverloadedVTs.size()) {
+        PrintError(R->getLoc(),
+                   "Parameter #" + Twine(i) + " has out of bounds matching "
+                   "number " + Twine(MatchTy));
+        PrintFatalError(Twine("ParamTypes is ") + TypeList->getAsString());
+      }
       VT = OverloadedVTs[MatchTy];
       // It only makes sense to use the extended and truncated vector element
       // variants with iAny types; otherwise, if the intrinsic is not
@@ -688,6 +709,6 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
   Properties = parseSDPatternOperatorProperties(R);
 
   // Sort the argument attributes for later benefit.
-  std::sort(ArgumentAttributes.begin(), ArgumentAttributes.end());
+  llvm::sort(ArgumentAttributes.begin(), ArgumentAttributes.end());
 }
 
diff --git a/utils/TableGen/CodeGenTarget.h b/utils/TableGen/CodeGenTarget.h
index 7280d707fba6..d2833d5b6a92 100644
--- a/utils/TableGen/CodeGenTarget.h
+++ b/utils/TableGen/CodeGenTarget.h
@@ -62,6 +62,7 @@ class CodeGenTarget {
   mutable std::unique_ptr<CodeGenSchedModels> SchedModels;
 
   mutable std::vector<const CodeGenInstruction*> InstrsByEnum;
+  mutable unsigned NumPseudoInstructions = 0;
 public:
   CodeGenTarget(RecordKeeper &Records);
   ~CodeGenTarget();
@@ -77,6 +78,11 @@ public:
   ///
   Record *getInstructionSet() const;
 
+  /// getAllowRegisterRenaming - Return the AllowRegisterRenaming flag value for
+  /// this target.
+  ///
+  bool getAllowRegisterRenaming() const;
+
   /// getAsmParser - Return the AssemblyParser definition for this target.
   ///
   Record *getAsmParser() const;
@@ -140,11 +146,25 @@ public:
     return *I->second;
   }
 
-  /// getInstructionsByEnumValue - Return all of the instructions defined by the
-  /// target, ordered by their enum value.
-  ArrayRef<const CodeGenInstruction *>
-  getInstructionsByEnumValue() const {
-    if (InstrsByEnum.empty()) ComputeInstrsByEnum();
+  /// Returns the number of predefined instructions.
+  static unsigned getNumFixedInstructions();
+
+  /// Returns the number of pseudo instructions.
+  unsigned getNumPseudoInstructions() const {
+    if (InstrsByEnum.empty())
+      ComputeInstrsByEnum();
+    return NumPseudoInstructions;
+  }
+
+  /// Return all of the instructions defined by the target, ordered by their
+  /// enum value.
+  /// The following order of instructions is also guaranteed:
+  /// - fixed / generic instructions as declared in TargetOpcodes.def, in order;
+  /// - pseudo instructions in lexicographical order sorted by name;
+  /// - other instructions in lexicographical order sorted by name.
+  ArrayRef<const CodeGenInstruction *> getInstructionsByEnumValue() const {
+    if (InstrsByEnum.empty())
+      ComputeInstrsByEnum();
     return InstrsByEnum;
   }
 
diff --git a/utils/TableGen/DAGISelEmitter.cpp b/utils/TableGen/DAGISelEmitter.cpp
index 9592ab7052f4..62a0ff700725 100644
--- a/utils/TableGen/DAGISelEmitter.cpp
+++ b/utils/TableGen/DAGISelEmitter.cpp
@@ -110,9 +110,11 @@ struct PatternSortingPredicate {
     if (LHSPatSize < RHSPatSize) return true;
     if (LHSPatSize > RHSPatSize) return false;
 
-    // Sort based on the UID of the pattern, giving us a deterministic ordering
-    // if all other sorting conditions fail.
-    assert(LHS == RHS || LHS->ID != RHS->ID);
+    // Sort based on the UID of the pattern, to reflect source order.
+    // Note that this is not guaranteed to be unique, since a single source
+    // pattern may have been resolved into multiple match patterns due to
+    // alternative fragments.  To ensure deterministic output, always use
+    // std::stable_sort with this predicate.
     return LHS->ID < RHS->ID;
   }
 };
@@ -137,13 +139,16 @@ void DAGISelEmitter::run(raw_ostream &OS) {
         "// When neither of the GET_DAGISEL* macros is defined, the functions\n"
         "// are emitted inline.\n\n";
 
-  DEBUG(errs() << "\n\nALL PATTERNS TO MATCH:\n\n";
-        for (CodeGenDAGPatterns::ptm_iterator I = CGP.ptm_begin(),
-             E = CGP.ptm_end(); I != E; ++I) {
-          errs() << "PATTERN: ";   I->getSrcPattern()->dump();
-          errs() << "\nRESULT:  "; I->getDstPattern()->dump();
-          errs() << "\n";
-        });
+  LLVM_DEBUG(errs() << "\n\nALL PATTERNS TO MATCH:\n\n";
+             for (CodeGenDAGPatterns::ptm_iterator I = CGP.ptm_begin(),
+                  E = CGP.ptm_end();
+                  I != E; ++I) {
+               errs() << "PATTERN: ";
+               I->getSrcPattern()->dump();
+               errs() << "\nRESULT:  ";
+               I->getDstPattern()->dump();
+               errs() << "\n";
+             });
 
   // Add all the patterns to a temporary list so we can sort them.
   std::vector<const PatternToMatch*> Patterns;
@@ -153,7 +158,8 @@ void DAGISelEmitter::run(raw_ostream &OS) {
 
   // We want to process the matches in order of minimal cost.  Sort the patterns
   // so the least cost one is at the start.
-  std::sort(Patterns.begin(), Patterns.end(), PatternSortingPredicate(CGP));
+  std::stable_sort(Patterns.begin(), Patterns.end(),
+                   PatternSortingPredicate(CGP));
 
 
   // Convert each variant of each pattern into a Matcher.
diff --git a/utils/TableGen/DAGISelMatcher.h b/utils/TableGen/DAGISelMatcher.h
index c672b0acac9f..ecc1f1dd094a 100644
--- a/utils/TableGen/DAGISelMatcher.h
+++ b/utils/TableGen/DAGISelMatcher.h
@@ -13,8 +13,8 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/MachineValueType.h"
 
 namespace llvm {
   struct CodeGenRegister;
diff --git a/utils/TableGen/DAGISelMatcherGen.cpp b/utils/TableGen/DAGISelMatcherGen.cpp
index a19b9e4b95c7..ce23651b9682 100644
--- a/utils/TableGen/DAGISelMatcherGen.cpp
+++ b/utils/TableGen/DAGISelMatcherGen.cpp
@@ -58,7 +58,7 @@ namespace {
     /// PatWithNoTypes - This is a clone of Pattern.getSrcPattern() that starts
     /// out with all of the types removed.  This allows us to insert type checks
     /// as we scan the tree.
-    TreePatternNode *PatWithNoTypes;
+    TreePatternNodePtr PatWithNoTypes;
 
     /// VariableMap - A map from variable names ('$dst') to the recorded operand
     /// number that they were captured as.  These are biased by 1 to make
@@ -101,10 +101,6 @@ namespace {
   public:
     MatcherGen(const PatternToMatch &pattern, const CodeGenDAGPatterns &cgp);
 
-    ~MatcherGen() {
-      delete PatWithNoTypes;
-    }
-
     bool EmitMatcherCode(unsigned Variant);
     void EmitResultCode();
 
@@ -134,10 +130,6 @@ namespace {
       return VarMapEntry-1;
     }
 
-    /// GetInstPatternNode - Get the pattern for an instruction.
-    const TreePatternNode *GetInstPatternNode(const DAGInstruction &Ins,
-                                              const TreePatternNode *N);
-
     void EmitResultOperand(const TreePatternNode *N,
                            SmallVectorImpl<unsigned> &ResultOps);
     void EmitResultOfNamedOperand(const TreePatternNode *N,
@@ -521,7 +513,8 @@ bool MatcherGen::EmitMatcherCode(unsigned Variant) {
   }
 
   // Emit the matcher for the pattern structure and types.
-  EmitMatchCode(Pattern.getSrcPattern(), PatWithNoTypes, Pattern.ForceMode);
+  EmitMatchCode(Pattern.getSrcPattern(), PatWithNoTypes.get(),
+                Pattern.ForceMode);
 
   // If the pattern has a predicate on it (e.g. only enabled when a subtarget
   // feature is around, do the check).
@@ -533,7 +526,7 @@ bool MatcherGen::EmitMatcherCode(unsigned Variant) {
   // because they are generally more expensive to evaluate and more difficult to
   // factor.
   for (unsigned i = 0, e = MatchedComplexPatterns.size(); i != e; ++i) {
-    const TreePatternNode *N = MatchedComplexPatterns[i].first;
+    auto N = MatchedComplexPatterns[i].first;
 
     // Remember where the results of this match get stuck.
     if (N->isLeaf()) {
@@ -664,28 +657,6 @@ void MatcherGen::EmitResultLeafAsOperand(const TreePatternNode *N,
   N->dump();
 }
 
-/// GetInstPatternNode - Get the pattern for an instruction.
-///
-const TreePatternNode *MatcherGen::
-GetInstPatternNode(const DAGInstruction &Inst, const TreePatternNode *N) {
-  const TreePattern *InstPat = Inst.getPattern();
-
-  // FIXME2?: Assume actual pattern comes before "implicit".
-  TreePatternNode *InstPatNode;
-  if (InstPat)
-    InstPatNode = InstPat->getTree(0);
-  else if (/*isRoot*/ N == Pattern.getDstPattern())
-    InstPatNode = Pattern.getSrcPattern();
-  else
-    return nullptr;
-
-  if (InstPatNode && !InstPatNode->isLeaf() &&
-      InstPatNode->getOperator()->getName() == "set")
-    InstPatNode = InstPatNode->getChild(InstPatNode->getNumChildren()-1);
-
-  return InstPatNode;
-}
-
 static bool
 mayInstNodeLoadOrStore(const TreePatternNode *N,
                        const CodeGenDAGPatterns &CGP) {
@@ -723,25 +694,6 @@ EmitResultInstructionAsOperand(const TreePatternNode *N,
   CodeGenInstruction &II = CGT.getInstruction(Op);
   const DAGInstruction &Inst = CGP.getInstruction(Op);
 
-  // If we can, get the pattern for the instruction we're generating. We derive
-  // a variety of information from this pattern, such as whether it has a chain.
-  //
-  // FIXME2: This is extremely dubious for several reasons, not the least of
-  // which it gives special status to instructions with patterns that Pat<>
-  // nodes can't duplicate.
-  const TreePatternNode *InstPatNode = GetInstPatternNode(Inst, N);
-
-  // NodeHasChain - Whether the instruction node we're creating takes chains.
-  bool NodeHasChain = InstPatNode &&
-                      InstPatNode->TreeHasProperty(SDNPHasChain, CGP);
-
-  // Instructions which load and store from memory should have a chain,
-  // regardless of whether they happen to have an internal pattern saying so.
-  if (Pattern.getSrcPattern()->TreeHasProperty(SDNPHasChain, CGP)
-      && (II.hasCtrlDep || II.mayLoad || II.mayStore || II.canFoldAsLoad ||
-          II.hasSideEffects))
-      NodeHasChain = true;
-
   bool isRoot = N == Pattern.getDstPattern();
 
   // TreeHasOutGlue - True if this tree has glue.
@@ -784,7 +736,7 @@ EmitResultInstructionAsOperand(const TreePatternNode *N,
       const DAGDefaultOperand &DefaultOp
         = CGP.getDefaultOperand(OperandNode);
       for (unsigned i = 0, e = DefaultOp.DefaultOps.size(); i != e; ++i)
-        EmitResultOperand(DefaultOp.DefaultOps[i], InstOps);
+        EmitResultOperand(DefaultOp.DefaultOps[i].get(), InstOps);
       continue;
     }
 
@@ -895,6 +847,26 @@ EmitResultInstructionAsOperand(const TreePatternNode *N,
                                              NumNodesThatLoadOrStore != 1));
   }
 
+  // Determine whether we need to attach a chain to this node.
+  bool NodeHasChain = false;
+  if (Pattern.getSrcPattern()->TreeHasProperty(SDNPHasChain, CGP)) {
+    // For some instructions, we were able to infer from the pattern whether
+    // they should have a chain.  Otherwise, attach the chain to the root.
+    //
+    // FIXME2: This is extremely dubious for several reasons, not the least of
+    // which it gives special status to instructions with patterns that Pat<>
+    // nodes can't duplicate.
+    if (II.hasChain_Inferred)
+      NodeHasChain = II.hasChain;
+    else
+      NodeHasChain = isRoot;
+    // Instructions which load and store from memory should have a chain,
+    // regardless of whether they happen to have a pattern saying so.
+    if (II.hasCtrlDep || II.mayLoad || II.mayStore || II.canFoldAsLoad ||
+        II.hasSideEffects)
+      NodeHasChain = true;
+  }
+
   assert((!ResultVTs.empty() || TreeHasOutGlue || NodeHasChain) &&
          "Node has no result");
 
diff --git a/utils/TableGen/DAGISelMatcherOpt.cpp b/utils/TableGen/DAGISelMatcherOpt.cpp
index 0bb656826fbd..554c7438ce3d 100644
--- a/utils/TableGen/DAGISelMatcherOpt.cpp
+++ b/utils/TableGen/DAGISelMatcherOpt.cpp
@@ -293,15 +293,12 @@ static void FactorNodes(std::unique_ptr<Matcher> &InputMatcherPtr) {
     if (Scan != e &&
         // Don't print it's obvious nothing extra could be merged anyway.
         Scan+1 != e) {
-      DEBUG(errs() << "Couldn't merge this:\n";
-            Optn->print(errs(), 4);
-            errs() << "into this:\n";
-            OptionsToMatch[Scan]->print(errs(), 4);
-            if (Scan+1 != e)
-              OptionsToMatch[Scan+1]->printOne(errs());
-            if (Scan+2 < e)
-              OptionsToMatch[Scan+2]->printOne(errs());
-            errs() << "\n");
+      LLVM_DEBUG(errs() << "Couldn't merge this:\n"; Optn->print(errs(), 4);
+                 errs() << "into this:\n";
+                 OptionsToMatch[Scan]->print(errs(), 4);
+                 if (Scan + 1 != e) OptionsToMatch[Scan + 1]->printOne(errs());
+                 if (Scan + 2 < e) OptionsToMatch[Scan + 2]->printOne(errs());
+                 errs() << "\n");
     }
     
     // If we only found one option starting with this matcher, no factoring is
diff --git a/utils/TableGen/DFAPacketizerEmitter.cpp b/utils/TableGen/DFAPacketizerEmitter.cpp
index 1c1932a0144a..0db0f55f5ed6 100644
--- a/utils/TableGen/DFAPacketizerEmitter.cpp
+++ b/utils/TableGen/DFAPacketizerEmitter.cpp
@@ -278,30 +278,30 @@ public:
 // dbgsInsnClass - When debugging, print instruction class stages.
 //
 void dbgsInsnClass(const std::vector<unsigned> &InsnClass) {
-  DEBUG(dbgs() << "InsnClass: ");
+  LLVM_DEBUG(dbgs() << "InsnClass: ");
   for (unsigned i = 0; i < InsnClass.size(); ++i) {
     if (i > 0) {
-      DEBUG(dbgs() << ", ");
+      LLVM_DEBUG(dbgs() << ", ");
     }
-    DEBUG(dbgs() << "0x" << Twine::utohexstr(InsnClass[i]));
+    LLVM_DEBUG(dbgs() << "0x" << Twine::utohexstr(InsnClass[i]));
   }
   DFAInput InsnInput = getDFAInsnInput(InsnClass);
-  DEBUG(dbgs() << " (input: 0x" << Twine::utohexstr(InsnInput) << ")");
+  LLVM_DEBUG(dbgs() << " (input: 0x" << Twine::utohexstr(InsnInput) << ")");
 }
 
 //
 // dbgsStateInfo - When debugging, print the set of state info.
 //
 void dbgsStateInfo(const std::set<unsigned> &stateInfo) {
-  DEBUG(dbgs() << "StateInfo: ");
+  LLVM_DEBUG(dbgs() << "StateInfo: ");
   unsigned i = 0;
   for (std::set<unsigned>::iterator SI = stateInfo.begin();
        SI != stateInfo.end(); ++SI, ++i) {
     unsigned thisState = *SI;
     if (i > 0) {
-      DEBUG(dbgs() << ", ");
+      LLVM_DEBUG(dbgs() << ", ");
     }
-    DEBUG(dbgs() << "0x" << Twine::utohexstr(thisState));
+    LLVM_DEBUG(dbgs() << "0x" << Twine::utohexstr(thisState));
   }
 }
 
@@ -310,7 +310,7 @@ void dbgsStateInfo(const std::set<unsigned> &stateInfo) {
 //
 void dbgsIndent(unsigned indent) {
   for (unsigned i = 0; i < indent; ++i) {
-    DEBUG(dbgs() << " ");
+    LLVM_DEBUG(dbgs() << " ");
   }
 }
 #endif // NDEBUG
@@ -361,7 +361,8 @@ void State::AddInsnClass(std::vector<unsigned> &InsnClass,
 
     DenseSet<unsigned> VisitedResourceStates;
 
-    DEBUG(dbgs() << "  thisState: 0x" << Twine::utohexstr(thisState) << "\n");
+    LLVM_DEBUG(dbgs() << "  thisState: 0x" << Twine::utohexstr(thisState)
+                      << "\n");
     AddInsnClassStages(InsnClass, ComboBitToBitsMap,
                                 numstages - 1, numstages,
                                 thisState, thisState,
@@ -378,7 +379,7 @@ void State::AddInsnClassStages(std::vector<unsigned> &InsnClass,
   assert((chkstage < numstages) && "AddInsnClassStages: stage out of range");
   unsigned thisStage = InsnClass[chkstage];
 
-  DEBUG({
+  LLVM_DEBUG({
     dbgsIndent((1 + numstages - chkstage) << 1);
     dbgs() << "AddInsnClassStages " << chkstage << " (0x"
            << Twine::utohexstr(thisStage) << ") from ";
@@ -395,10 +396,10 @@ void State::AddInsnClassStages(std::vector<unsigned> &InsnClass,
     if (resourceMask & thisStage) {
       unsigned combo = ComboBitToBitsMap[resourceMask];
       if (combo && ((~prevState & combo) != combo)) {
-        DEBUG(dbgs() << "\tSkipped Add 0x" << Twine::utohexstr(prevState)
-                     << " - combo op 0x" << Twine::utohexstr(resourceMask)
-                     << " (0x" << Twine::utohexstr(combo)
-                     << ") cannot be scheduled\n");
+        LLVM_DEBUG(dbgs() << "\tSkipped Add 0x" << Twine::utohexstr(prevState)
+                          << " - combo op 0x" << Twine::utohexstr(resourceMask)
+                          << " (0x" << Twine::utohexstr(combo)
+                          << ") cannot be scheduled\n");
         continue;
       }
       //
@@ -406,7 +407,7 @@ void State::AddInsnClassStages(std::vector<unsigned> &InsnClass,
       // resource state if that resource was used.
       //
       unsigned ResultingResourceState = prevState | resourceMask | combo;
-      DEBUG({
+      LLVM_DEBUG({
         dbgsIndent((2 + numstages - chkstage) << 1);
         dbgs() << "0x" << Twine::utohexstr(prevState) << " | 0x"
                << Twine::utohexstr(resourceMask);
@@ -433,13 +434,15 @@ void State::AddInsnClassStages(std::vector<unsigned> &InsnClass,
           if (VisitedResourceStates.count(ResultingResourceState) == 0) {
             VisitedResourceStates.insert(ResultingResourceState);
             PossibleStates.insert(ResultingResourceState);
-            DEBUG(dbgs() << "\tResultingResourceState: 0x"
-                         << Twine::utohexstr(ResultingResourceState) << "\n");
+            LLVM_DEBUG(dbgs()
+                       << "\tResultingResourceState: 0x"
+                       << Twine::utohexstr(ResultingResourceState) << "\n");
           } else {
-            DEBUG(dbgs() << "\tSkipped Add - state already seen\n");
+            LLVM_DEBUG(dbgs() << "\tSkipped Add - state already seen\n");
           }
         } else {
-          DEBUG(dbgs() << "\tSkipped Add - no final resources available\n");
+          LLVM_DEBUG(dbgs()
+                     << "\tSkipped Add - no final resources available\n");
         }
       } else {
         //
@@ -447,13 +450,13 @@ void State::AddInsnClassStages(std::vector<unsigned> &InsnClass,
         // stage in InsnClass for available resources.
         //
         if (ResultingResourceState != prevState) {
-          DEBUG(dbgs() << "\n");
+          LLVM_DEBUG(dbgs() << "\n");
           AddInsnClassStages(InsnClass, ComboBitToBitsMap,
                                 chkstage - 1, numstages,
                                 ResultingResourceState, origState,
                                 VisitedResourceStates, PossibleStates);
         } else {
-          DEBUG(dbgs() << "\tSkipped Add - no resources available\n");
+          LLVM_DEBUG(dbgs() << "\tSkipped Add - no resources available\n");
         }
       }
     }
@@ -494,10 +497,11 @@ bool State::canMaybeAddInsnClass(std::vector<unsigned> &InsnClass,
       //       These cases are caught later in AddInsnClass.
       unsigned combo = ComboBitToBitsMap[InsnClass[i]];
       if (combo && ((~resources & combo) != combo)) {
-        DEBUG(dbgs() << "\tSkipped canMaybeAdd 0x"
-                     << Twine::utohexstr(resources) << " - combo op 0x"
-                     << Twine::utohexstr(InsnClass[i]) << " (0x"
-                     << Twine::utohexstr(combo) << ") cannot be scheduled\n");
+        LLVM_DEBUG(dbgs() << "\tSkipped canMaybeAdd 0x"
+                          << Twine::utohexstr(resources) << " - combo op 0x"
+                          << Twine::utohexstr(InsnClass[i]) << " (0x"
+                          << Twine::utohexstr(combo)
+                          << ") cannot be scheduled\n");
         available = false;
         break;
       }
@@ -537,9 +541,10 @@ void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName,
                            int maxResources, int numCombos, int maxStages) {
   unsigned numStates = states.size();
 
-  DEBUG(dbgs() << "-----------------------------------------------------------------------------\n");
-  DEBUG(dbgs() << "writeTableAndAPI\n");
-  DEBUG(dbgs() << "Total states: " << numStates << "\n");
+  LLVM_DEBUG(dbgs() << "-------------------------------------------------------"
+                       "----------------------\n");
+  LLVM_DEBUG(dbgs() << "writeTableAndAPI\n");
+  LLVM_DEBUG(dbgs() << "Total states: " << numStates << "\n");
 
   OS << "namespace llvm {\n";
 
@@ -647,9 +652,10 @@ int DFAPacketizerEmitter::collectAllFuncUnits(
                             std::map<std::string, unsigned> &FUNameToBitsMap,
                             int &maxFUs,
                             raw_ostream &OS) {
-  DEBUG(dbgs() << "-----------------------------------------------------------------------------\n");
-  DEBUG(dbgs() << "collectAllFuncUnits");
-  DEBUG(dbgs() << " (" << ProcItinList.size() << " itineraries)\n");
+  LLVM_DEBUG(dbgs() << "-------------------------------------------------------"
+                       "----------------------\n");
+  LLVM_DEBUG(dbgs() << "collectAllFuncUnits");
+  LLVM_DEBUG(dbgs() << " (" << ProcItinList.size() << " itineraries)\n");
 
   int totalFUs = 0;
   // Parse functional units for all the itineraries.
@@ -657,10 +663,8 @@ int DFAPacketizerEmitter::collectAllFuncUnits(
     Record *Proc = ProcItinList[i];
     std::vector<Record*> FUs = Proc->getValueAsListOfDefs("FU");
 
-    DEBUG(dbgs() << "    FU:" << i
-                 << " (" << FUs.size() << " FUs) "
-                 << Proc->getName());
-
+    LLVM_DEBUG(dbgs() << "    FU:" << i << " (" << FUs.size() << " FUs) "
+                      << Proc->getName());
 
     // Convert macros to bits for each stage.
     unsigned numFUs = FUs.size();
@@ -669,14 +673,14 @@ int DFAPacketizerEmitter::collectAllFuncUnits(
                       "Exceeded maximum number of representable resources");
       unsigned FuncResources = (unsigned) (1U << j);
       FUNameToBitsMap[FUs[j]->getName()] = FuncResources;
-      DEBUG(dbgs() << " " << FUs[j]->getName() << ":0x"
-                   << Twine::utohexstr(FuncResources));
+      LLVM_DEBUG(dbgs() << " " << FUs[j]->getName() << ":0x"
+                        << Twine::utohexstr(FuncResources));
     }
     if (((int) numFUs) > maxFUs) {
       maxFUs = numFUs;
     }
     totalFUs += numFUs;
-    DEBUG(dbgs() << "\n");
+    LLVM_DEBUG(dbgs() << "\n");
   }
   return totalFUs;
 }
@@ -690,18 +694,18 @@ int DFAPacketizerEmitter::collectAllComboFuncs(
                             std::map<std::string, unsigned> &FUNameToBitsMap,
                             std::map<unsigned, unsigned> &ComboBitToBitsMap,
                             raw_ostream &OS) {
-  DEBUG(dbgs() << "-----------------------------------------------------------------------------\n");
-  DEBUG(dbgs() << "collectAllComboFuncs");
-  DEBUG(dbgs() << " (" << ComboFuncList.size() << " sets)\n");
+  LLVM_DEBUG(dbgs() << "-------------------------------------------------------"
+                       "----------------------\n");
+  LLVM_DEBUG(dbgs() << "collectAllComboFuncs");
+  LLVM_DEBUG(dbgs() << " (" << ComboFuncList.size() << " sets)\n");
 
   int numCombos = 0;
   for (unsigned i = 0, N = ComboFuncList.size(); i < N; ++i) {
     Record *Func = ComboFuncList[i];
     std::vector<Record*> FUs = Func->getValueAsListOfDefs("CFD");
 
-    DEBUG(dbgs() << "    CFD:" << i
-                 << " (" << FUs.size() << " combo FUs) "
-                 << Func->getName() << "\n");
+    LLVM_DEBUG(dbgs() << "    CFD:" << i << " (" << FUs.size() << " combo FUs) "
+                      << Func->getName() << "\n");
 
     // Convert macros to bits for each stage.
     for (unsigned j = 0, N = FUs.size(); j < N; ++j) {
@@ -714,20 +718,20 @@ int DFAPacketizerEmitter::collectAllComboFuncs(
       const std::string &ComboFuncName = ComboFunc->getName();
       unsigned ComboBit = FUNameToBitsMap[ComboFuncName];
       unsigned ComboResources = ComboBit;
-      DEBUG(dbgs() << "      combo: " << ComboFuncName << ":0x"
-                   << Twine::utohexstr(ComboResources) << "\n");
+      LLVM_DEBUG(dbgs() << "      combo: " << ComboFuncName << ":0x"
+                        << Twine::utohexstr(ComboResources) << "\n");
       for (unsigned k = 0, M = FuncList.size(); k < M; ++k) {
         std::string FuncName = FuncList[k]->getName();
         unsigned FuncResources = FUNameToBitsMap[FuncName];
-        DEBUG(dbgs() << "        " << FuncName << ":0x"
-                     << Twine::utohexstr(FuncResources) << "\n");
+        LLVM_DEBUG(dbgs() << "        " << FuncName << ":0x"
+                          << Twine::utohexstr(FuncResources) << "\n");
         ComboResources |= FuncResources;
       }
       ComboBitToBitsMap[ComboBit] = ComboResources;
       numCombos++;
-      DEBUG(dbgs() << "          => combo bits: " << ComboFuncName << ":0x"
-                   << Twine::utohexstr(ComboBit) << " = 0x"
-                   << Twine::utohexstr(ComboResources) << "\n");
+      LLVM_DEBUG(dbgs() << "          => combo bits: " << ComboFuncName << ":0x"
+                        << Twine::utohexstr(ComboBit) << " = 0x"
+                        << Twine::utohexstr(ComboResources) << "\n");
     }
   }
   return numCombos;
@@ -747,8 +751,8 @@ int DFAPacketizerEmitter::collectOneInsnClass(const std::string &ProcName,
   // The number of stages.
   unsigned NStages = StageList.size();
 
-  DEBUG(dbgs() << "    " << ItinData->getValueAsDef("TheClass")->getName()
-               << "\n");
+  LLVM_DEBUG(dbgs() << "    " << ItinData->getValueAsDef("TheClass")->getName()
+                    << "\n");
 
   std::vector<unsigned> UnitBits;
 
@@ -760,8 +764,8 @@ int DFAPacketizerEmitter::collectOneInsnClass(const std::string &ProcName,
     const std::vector<Record*> &UnitList =
       Stage->getValueAsListOfDefs("Units");
 
-    DEBUG(dbgs() << "        stage:" << i
-                 << " [" << UnitList.size() << " units]:");
+    LLVM_DEBUG(dbgs() << "        stage:" << i << " [" << UnitList.size()
+                      << " units]:");
     unsigned dbglen = 26;  // cursor after stage dbgs
 
     // Compute the bitwise or of each unit used in this stage.
@@ -769,7 +773,7 @@ int DFAPacketizerEmitter::collectOneInsnClass(const std::string &ProcName,
     for (unsigned j = 0, M = UnitList.size(); j < M; ++j) {
       // Conduct bitwise or.
       std::string UnitName = UnitList[j]->getName();
-      DEBUG(dbgs() << " " << j << ":" << UnitName);
+      LLVM_DEBUG(dbgs() << " " << j << ":" << UnitName);
       dbglen += 3 + UnitName.length();
       assert(FUNameToBitsMap.count(UnitName));
       UnitBitValue |= FUNameToBitsMap[UnitName];
@@ -780,15 +784,16 @@ int DFAPacketizerEmitter::collectOneInsnClass(const std::string &ProcName,
 
     while (dbglen <= 64) {   // line up bits dbgs
         dbglen += 8;
-        DEBUG(dbgs() << "\t");
+        LLVM_DEBUG(dbgs() << "\t");
     }
-    DEBUG(dbgs() << " (bits: 0x" << Twine::utohexstr(UnitBitValue) << ")\n");
+    LLVM_DEBUG(dbgs() << " (bits: 0x" << Twine::utohexstr(UnitBitValue)
+                      << ")\n");
   }
 
   if (!UnitBits.empty())
     allInsnClasses.push_back(UnitBits);
 
-  DEBUG({
+  LLVM_DEBUG({
     dbgs() << "        ";
     dbgsInsnClass(UnitBits);
     dbgs() << "\n";
@@ -811,10 +816,10 @@ int DFAPacketizerEmitter::collectAllInsnClasses(const std::string &ProcName,
   unsigned M = ItinDataList.size();
 
   int numInsnClasses = 0;
-  DEBUG(dbgs() << "-----------------------------------------------------------------------------\n"
-               << "collectAllInsnClasses "
-               << ProcName
-               << " (" << M << " classes)\n");
+  LLVM_DEBUG(dbgs() << "-------------------------------------------------------"
+                       "----------------------\n"
+                    << "collectAllInsnClasses " << ProcName << " (" << M
+                    << " classes)\n");
 
   // Collect stages for each instruction class for all itinerary data
   for (unsigned j = 0; j < M; j++) {
@@ -914,7 +919,7 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
   //
   while (!WorkList.empty()) {
     const State *current = WorkList.pop_back_val();
-    DEBUG({
+    LLVM_DEBUG({
       dbgs() << "---------------------\n";
       dbgs() << "Processing state: " << current->stateNum << " - ";
       dbgsStateInfo(current->stateInfo);
@@ -922,7 +927,7 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
     });
     for (unsigned i = 0; i < allInsnClasses.size(); i++) {
       std::vector<unsigned> InsnClass = allInsnClasses[i];
-      DEBUG({
+      LLVM_DEBUG({
         dbgs() << i << " ";
         dbgsInsnClass(InsnClass);
         dbgs() << "\n";
@@ -938,11 +943,11 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
         const State *NewState = nullptr;
         current->AddInsnClass(InsnClass, ComboBitToBitsMap, NewStateResources);
         if (NewStateResources.empty()) {
-          DEBUG(dbgs() << "  Skipped - no new states generated\n");
+          LLVM_DEBUG(dbgs() << "  Skipped - no new states generated\n");
           continue;
         }
 
-        DEBUG({
+        LLVM_DEBUG({
           dbgs() << "\t";
           dbgsStateInfo(NewStateResources);
           dbgs() << "\n";
@@ -954,7 +959,7 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
         auto VI = Visited.find(NewStateResources);
         if (VI != Visited.end()) {
           NewState = VI->second;
-          DEBUG({
+          LLVM_DEBUG({
             dbgs() << "\tFound existing state: " << NewState->stateNum
                    << " - ";
             dbgsStateInfo(NewState->stateInfo);
@@ -965,7 +970,7 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
           NewState->stateInfo = NewStateResources;
           Visited[NewStateResources] = NewState;
           WorkList.push_back(NewState);
-          DEBUG({
+          LLVM_DEBUG({
             dbgs() << "\tAccepted new state: " << NewState->stateNum << " - ";
             dbgsStateInfo(NewState->stateInfo);
             dbgs() << "\n";
diff --git a/utils/TableGen/DisassemblerEmitter.cpp b/utils/TableGen/DisassemblerEmitter.cpp
index 6e1d8dde981c..b99a0a973a2c 100644
--- a/utils/TableGen/DisassemblerEmitter.cpp
+++ b/utils/TableGen/DisassemblerEmitter.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeGenTarget.h"
+#include "WebAssemblyDisassemblerEmitter.h"
 #include "X86DisassemblerTables.h"
 #include "X86RecognizableInstr.h"
 #include "llvm/TableGen/Error.h"
@@ -74,8 +75,8 @@ using namespace llvm::X86Disassembler;
 ///     accurate.  Sometimes they are not.
 /// (3) to fix the tables to reflect the actual context (for example, required
 ///     prefixes), and possibly to add a new context by editing
-///     lib/Target/X86/X86DisassemblerDecoderCommon.h.  This is unlikely to be
-///     the cause.
+///     include/llvm/Support/X86DisassemblerDecoderCommon.h.  This is unlikely
+///     to be the cause.
 ///
 /// DisassemblerEmitter.cpp contains the implementation for the emitter,
 ///   which simply pulls out instructions from the CodeGenTarget and pushes them
@@ -125,6 +126,14 @@ void EmitDisassembler(RecordKeeper &Records, raw_ostream &OS) {
     return;
   }
 
+  // WebAssembly has variable length opcodes, so can't use EmitFixedLenDecoder
+  // below (which depends on a Size table-gen Record), and also uses a custom
+  // disassembler.
+  if (Target.getName() == "WebAssembly") {
+    emitWebAssemblyDisassemblerTables(OS, Target.getInstructionsByEnumValue());
+    return;
+  }
+
   // ARM and Thumb have a CHECK() macro to deal with DecodeStatuses.
   if (Target.getName() == "ARM" || Target.getName() == "Thumb" ||
       Target.getName() == "AArch64" || Target.getName() == "ARM64") {
diff --git a/utils/TableGen/FastISelEmitter.cpp b/utils/TableGen/FastISelEmitter.cpp
index 610f4d21bf2d..c0902e4c6f1a 100644
--- a/utils/TableGen/FastISelEmitter.cpp
+++ b/utils/TableGen/FastISelEmitter.cpp
@@ -36,8 +36,18 @@ struct InstructionMemo {
   std::string Name;
   const CodeGenRegisterClass *RC;
   std::string SubRegNo;
-  std::vector<std::string>* PhysRegs;
+  std::vector<std::string> PhysRegs;
   std::string PredicateCheck;
+
+  InstructionMemo(std::string Name, const CodeGenRegisterClass *RC,
+                  std::string SubRegNo, std::vector<std::string> PhysRegs,
+                  std::string PredicateCheck)
+    : Name(Name), RC(RC), SubRegNo(SubRegNo), PhysRegs(PhysRegs),
+      PredicateCheck(PredicateCheck) {}
+
+  // Make sure we do not copy InstructionMemo.
+  InstructionMemo(const InstructionMemo &Other) = delete;
+  InstructionMemo(InstructionMemo &&Other) = default;
 };
 } // End anonymous namespace
 
@@ -453,6 +463,13 @@ void FastISelMap::collectPatterns(CodeGenDAGPatterns &CGP) {
     if (II.Operands.empty())
       continue;
 
+    // Allow instructions to be marked as unavailable for FastISel for
+    // certain cases, i.e. an ISA has two 'and' instruction which differ
+    // by what registers they can use but are otherwise identical for
+    // codegen purposes.
+    if (II.FastISelShouldIgnore)
+      continue;
+
     // For now, ignore multi-instruction patterns.
     bool MultiInsts = false;
     for (unsigned i = 0, e = Dst->getNumChildren(); i != e; ++i) {
@@ -520,10 +537,10 @@ void FastISelMap::collectPatterns(CodeGenDAGPatterns &CGP) {
                              DstRC))
       continue;
 
-    std::vector<std::string>* PhysRegInputs = new std::vector<std::string>();
+    std::vector<std::string> PhysRegInputs;
     if (InstPatNode->getOperator()->getName() == "imm" ||
         InstPatNode->getOperator()->getName() == "fpimm")
-      PhysRegInputs->push_back("");
+      PhysRegInputs.push_back("");
     else {
       // Compute the PhysRegs used by the given pattern, and check that
       // the mapping from the src to dst patterns is simple.
@@ -541,7 +558,7 @@ void FastISelMap::collectPatterns(CodeGenDAGPatterns &CGP) {
           ++DstIndex;
         }
 
-        PhysRegInputs->push_back(PhysReg);
+        PhysRegInputs.push_back(PhysReg);
       }
 
       if (Op->getName() != "EXTRACT_SUBREG" && DstIndex < Dst->getNumChildren())
@@ -565,13 +582,13 @@ void FastISelMap::collectPatterns(CodeGenDAGPatterns &CGP) {
     std::string PredicateCheck = Pattern.getPredicateCheck();
 
     // Ok, we found a pattern that we can handle. Remember it.
-    InstructionMemo Memo = {
+    InstructionMemo Memo(
       Pattern.getDstPattern()->getOperator()->getName(),
       DstRC,
       SubRegNo,
       PhysRegInputs,
       PredicateCheck
-    };
+    );
 
     int complexity = Pattern.getPatternComplexity(CGP);
 
@@ -585,8 +602,8 @@ void FastISelMap::collectPatterns(CodeGenDAGPatterns &CGP) {
 
        // Note: Instructions with the same complexity will appear in the order
           // that they are encountered.
-    SimplePatterns[Operands][OpcodeName][VT][RetVT].insert(
-      std::make_pair(complexity, Memo));
+    SimplePatterns[Operands][OpcodeName][VT][RetVT].emplace(complexity,
+                                                            std::move(Memo));
 
     // If any of the operands were immediates with predicates on them, strip
     // them down to a signature that doesn't have predicates so that we can
@@ -641,22 +658,22 @@ void FastISelMap::emitInstructionCode(raw_ostream &OS,
       OS << "  ";
     }
 
-    for (unsigned i = 0; i < Memo.PhysRegs->size(); ++i) {
-      if ((*Memo.PhysRegs)[i] != "")
+    for (unsigned i = 0; i < Memo.PhysRegs.size(); ++i) {
+      if (Memo.PhysRegs[i] != "")
         OS << "  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, "
-           << "TII.get(TargetOpcode::COPY), "
-           << (*Memo.PhysRegs)[i] << ").addReg(Op" << i << ");\n";
+           << "TII.get(TargetOpcode::COPY), " << Memo.PhysRegs[i]
+           << ").addReg(Op" << i << ");\n";
     }
 
     OS << "  return fastEmitInst_";
     if (Memo.SubRegNo.empty()) {
-      Operands.PrintManglingSuffix(OS, *Memo.PhysRegs,
-     ImmediatePredicates, true);
+      Operands.PrintManglingSuffix(OS, Memo.PhysRegs, ImmediatePredicates,
+                                   true);
       OS << "(" << InstNS << "::" << Memo.Name << ", ";
       OS << "&" << InstNS << "::" << Memo.RC->getName() << "RegClass";
       if (!Operands.empty())
         OS << ", ";
-      Operands.PrintArguments(OS, *Memo.PhysRegs);
+      Operands.PrintArguments(OS, Memo.PhysRegs);
       OS << ");\n";
     } else {
       OS << "extractsubreg(" << RetVTName
@@ -811,7 +828,7 @@ void FastISelMap::printFunctionDefinitions(raw_ostream &OS) {
       = SignaturesWithConstantForms.find(Operands);
     if (MI != SignaturesWithConstantForms.end()) {
       // Unique any duplicates out of the list.
-      std::sort(MI->second.begin(), MI->second.end());
+      llvm::sort(MI->second.begin(), MI->second.end());
       MI->second.erase(std::unique(MI->second.begin(), MI->second.end()),
                        MI->second.end());
 
diff --git a/utils/TableGen/FixedLenDecoderEmitter.cpp b/utils/TableGen/FixedLenDecoderEmitter.cpp
index 03930d7132df..76ba1c001092 100644
--- a/utils/TableGen/FixedLenDecoderEmitter.cpp
+++ b/utils/TableGen/FixedLenDecoderEmitter.cpp
@@ -606,12 +606,13 @@ static void resolveTableFixups(DecoderTable &Table, const FixupList &Fixups,
     // NumToSkip entry itself, so subtract two  from the displacement here
     // to account for that.
     uint32_t FixupIdx = *I;
-    uint32_t Delta = DestIdx - FixupIdx - 2;
-    // Our NumToSkip entries are 16-bits. Make sure our table isn't too
+    uint32_t Delta = DestIdx - FixupIdx - 3;
+    // Our NumToSkip entries are 24-bits. Make sure our table isn't too
     // big.
-    assert(Delta < 65536U && "disassembler decoding table too large!");
+    assert(Delta < (1u << 24));
     Table[FixupIdx] = (uint8_t)Delta;
     Table[FixupIdx + 1] = (uint8_t)(Delta >> 8);
+    Table[FixupIdx + 2] = (uint8_t)(Delta >> 16);
   }
 }
 
@@ -646,7 +647,7 @@ void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const {
     } else {
       Table.push_back(MCD::OPC_FilterValue);
       // Encode and emit the value to filter against.
-      uint8_t Buffer[8];
+      uint8_t Buffer[16];
       unsigned Len = encodeULEB128(Filter.first, Buffer);
       Table.insert(Table.end(), Buffer, Buffer + Len);
       // Reserve space for the NumToSkip entry. We'll backpatch the value
@@ -654,6 +655,7 @@ void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const {
       PrevFilter = Table.size();
       Table.push_back(0);
       Table.push_back(0);
+      Table.push_back(0);
     }
 
     // We arrive at a category of instructions with the same segment value.
@@ -666,10 +668,11 @@ void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const {
     // of the filter itself to be able to skip forward when false. Subtract
     // two as to account for the width of the NumToSkip field itself.
     if (PrevFilter) {
-      uint32_t NumToSkip = Table.size() - PrevFilter - 2;
-      assert(NumToSkip < 65536U && "disassembler decoding table too large!");
+      uint32_t NumToSkip = Table.size() - PrevFilter - 3;
+      assert(NumToSkip < (1u << 24) && "disassembler decoding table too large!");
       Table[PrevFilter] = (uint8_t)NumToSkip;
       Table[PrevFilter + 1] = (uint8_t)(NumToSkip >> 8);
+      Table[PrevFilter + 2] = (uint8_t)(NumToSkip >> 16);
     }
   }
 
@@ -745,13 +748,16 @@ void FixedLenDecoderEmitter::emitTable(formatted_raw_ostream &OS,
         OS << (unsigned)*I++ << ", ";
       OS << (unsigned)*I++ << ", ";
 
-      // 16-bit numtoskip value.
+      // 24-bit numtoskip value.
       uint8_t Byte = *I++;
       uint32_t NumToSkip = Byte;
       OS << (unsigned)Byte << ", ";
       Byte = *I++;
       OS << (unsigned)Byte << ", ";
       NumToSkip |= Byte << 8;
+      Byte = *I++;
+      OS << utostr(Byte) << ", ";
+      NumToSkip |= Byte << 16;
       OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
       break;
     }
@@ -765,13 +771,16 @@ void FixedLenDecoderEmitter::emitTable(formatted_raw_ostream &OS,
       for (; *I >= 128; ++I)
         OS << (unsigned)*I << ", ";
       OS << (unsigned)*I++ << ", ";
-      // 16-bit numtoskip value.
+      // 24-bit numtoskip value.
       uint8_t Byte = *I++;
       uint32_t NumToSkip = Byte;
       OS << (unsigned)Byte << ", ";
       Byte = *I++;
       OS << (unsigned)Byte << ", ";
       NumToSkip |= Byte << 8;
+      Byte = *I++;
+      OS << utostr(Byte) << ", ";
+      NumToSkip |= Byte << 16;
       OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
       break;
     }
@@ -782,13 +791,16 @@ void FixedLenDecoderEmitter::emitTable(formatted_raw_ostream &OS,
         OS << (unsigned)*I << ", ";
       OS << (unsigned)*I++ << ", ";
 
-      // 16-bit numtoskip value.
+      // 24-bit numtoskip value.
       uint8_t Byte = *I++;
       uint32_t NumToSkip = Byte;
       OS << (unsigned)Byte << ", ";
       Byte = *I++;
       OS << (unsigned)Byte << ", ";
       NumToSkip |= Byte << 8;
+      Byte = *I++;
+      OS << utostr(Byte) << ", ";
+      NumToSkip |= Byte << 16;
       OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
       break;
     }
@@ -797,7 +809,7 @@ void FixedLenDecoderEmitter::emitTable(formatted_raw_ostream &OS,
       bool IsTry = *I == MCD::OPC_TryDecode;
       ++I;
       // Extract the ULEB128 encoded Opcode to a buffer.
-      uint8_t Buffer[8], *p = Buffer;
+      uint8_t Buffer[16], *p = Buffer;
       while ((*p++ = *I++) >= 128)
         assert((p - Buffer) <= (ptrdiff_t)sizeof(Buffer)
                && "ULEB128 value too large!");
@@ -822,13 +834,16 @@ void FixedLenDecoderEmitter::emitTable(formatted_raw_ostream &OS,
 
       // Fallthrough for OPC_TryDecode.
 
-      // 16-bit numtoskip value.
+      // 24-bit numtoskip value.
       uint8_t Byte = *I++;
       uint32_t NumToSkip = Byte;
       OS << (unsigned)Byte << ", ";
       Byte = *I++;
       OS << (unsigned)Byte << ", ";
       NumToSkip |= Byte << 8;
+      Byte = *I++;
+      OS << utostr(Byte) << ", ";
+      NumToSkip |= Byte << 16;
 
       OS << "// Opcode: "
          << NumberedInstructions[Opc]->TheDef->getName()
@@ -1226,6 +1241,7 @@ void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo,
   TableInfo.FixupStack.back().push_back(TableInfo.Table.size());
   TableInfo.Table.push_back(0);
   TableInfo.Table.push_back(0);
+  TableInfo.Table.push_back(0);
 }
 
 void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo,
@@ -1311,18 +1327,19 @@ void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
     TableInfo.Table.push_back(MCD::OPC_CheckField);
     TableInfo.Table.push_back(StartBits[I-1]);
     TableInfo.Table.push_back(NumBits);
-    uint8_t Buffer[8], *p;
+    uint8_t Buffer[16], *p;
     encodeULEB128(FieldVals[I-1], Buffer);
     for (p = Buffer; *p >= 128 ; ++p)
       TableInfo.Table.push_back(*p);
     TableInfo.Table.push_back(*p);
     // Push location for NumToSkip backpatching.
     TableInfo.FixupStack.back().push_back(TableInfo.Table.size());
-    // The fixup is always 16-bits, so go ahead and allocate the space
+    // The fixup is always 24-bits, so go ahead and allocate the space
     // in the table so all our relative position calculations work OK even
     // before we fully resolve the real value here.
     TableInfo.Table.push_back(0);
     TableInfo.Table.push_back(0);
+    TableInfo.Table.push_back(0);
   }
 
   // Check for soft failure of the match.
@@ -1342,7 +1359,7 @@ void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
   // can decode it.
   TableInfo.Table.push_back(HasCompleteDecoder ? MCD::OPC_Decode :
       MCD::OPC_TryDecode);
-  uint8_t Buffer[8], *p;
+  uint8_t Buffer[16], *p;
   encodeULEB128(Opc, Buffer);
   for (p = Buffer; *p >= 128 ; ++p)
     TableInfo.Table.push_back(*p);
@@ -1362,6 +1379,7 @@ void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
     // Allocate the space for the fixup.
     TableInfo.Table.push_back(0);
     TableInfo.Table.push_back(0);
+    TableInfo.Table.push_back(0);
   }
 }
 
@@ -1701,10 +1719,9 @@ void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const {
 static std::string findOperandDecoderMethod(TypedInit *TI) {
   std::string Decoder;
 
-  RecordRecTy *Type = cast<RecordRecTy>(TI->getType());
-  Record *TypeRecord = Type->getRecord();
+  Record *Record = cast<DefInit>(TI)->getDef();
 
-  RecordVal *DecoderString = TypeRecord->getValue("DecoderMethod");
+  RecordVal *DecoderString = Record->getValue("DecoderMethod");
   StringInit *String = DecoderString ?
     dyn_cast<StringInit>(DecoderString->getValue()) : nullptr;
   if (String) {
@@ -1713,14 +1730,14 @@ static std::string findOperandDecoderMethod(TypedInit *TI) {
       return Decoder;
   }
 
-  if (TypeRecord->isSubClassOf("RegisterOperand"))
-    TypeRecord = TypeRecord->getValueAsDef("RegClass");
+  if (Record->isSubClassOf("RegisterOperand"))
+    Record = Record->getValueAsDef("RegClass");
 
-  if (TypeRecord->isSubClassOf("RegisterClass")) {
-    Decoder = "Decode" + TypeRecord->getName().str() + "RegisterClass";
-  } else if (TypeRecord->isSubClassOf("PointerLikeRegClass")) {
+  if (Record->isSubClassOf("RegisterClass")) {
+    Decoder = "Decode" + Record->getName().str() + "RegisterClass";
+  } else if (Record->isSubClassOf("PointerLikeRegClass")) {
     Decoder = "DecodePointerLikeRegClass" +
-      utostr(TypeRecord->getValueAsInt("RegClassKind"));
+      utostr(Record->getValueAsInt("RegClassKind"));
   }
 
   return Decoder;
@@ -1860,9 +1877,9 @@ static bool populateInstruction(CodeGenTarget &Target,
         CGI.Operands.getSubOperandNumber(OpIdx);
       const std::string &Name = CGI.Operands[SO.first].Name;
 
-      DEBUG(dbgs() << "Numbered operand mapping for " << Def.getName() << ": " <<
-                      Name << "(" << SO.first << ", " << SO.second << ") => " <<
-                      Vals[i].getName() << "\n");
+      LLVM_DEBUG(dbgs() << "Numbered operand mapping for " << Def.getName()
+                        << ": " << Name << "(" << SO.first << ", " << SO.second
+                        << ") => " << Vals[i].getName() << "\n");
 
       std::string Decoder;
       Record *TypeRecord = CGI.Operands[SO.first].Rec;
@@ -1878,10 +1895,8 @@ static bool populateInstruction(CodeGenTarget &Target,
           CGI.Operands[SO.first].MIOperandInfo->getNumArgs()) {
         Init *Arg = CGI.Operands[SO.first].MIOperandInfo->
                       getArg(SO.second);
-        if (TypedInit *TI = cast<TypedInit>(Arg)) {
-          RecordRecTy *Type = cast<RecordRecTy>(TI->getType());
-          TypeRecord = Type->getRecord();
-        }
+        if (DefInit *DI = cast<DefInit>(Arg))
+          TypeRecord = DI->getDef();
       }
 
       bool isReg = false;
@@ -1959,7 +1974,7 @@ static bool populateInstruction(CodeGenTarget &Target,
     // to interpret it.  As a first step, require the target to provide
     // callbacks for decoding register classes.
     std::string Decoder = findOperandDecoderMethod(TI);
-    Record *TypeRecord = cast<RecordRecTy>(TI->getType())->getRecord();
+    Record *TypeRecord = cast<DefInit>(TI)->getDef();
 
     RecordVal *HasCompleteDecoderVal =
       TypeRecord->getValue("hasCompleteDecoder");
@@ -2026,7 +2041,7 @@ static bool populateInstruction(CodeGenTarget &Target,
   Operands[Opc] = InsnOperands;
 
 #if 0
-  DEBUG({
+  LLVM_DEBUG({
       // Dumps the instruction encoding bits.
       dumpBits(errs(), Bits);
 
@@ -2048,10 +2063,16 @@ static bool populateInstruction(CodeGenTarget &Target,
 
 // emitFieldFromInstruction - Emit the templated helper function
 // fieldFromInstruction().
+// On Windows we make sure that this function is not inlined when
+// using the VS compiler. It has a bug which causes the function
+// to be optimized out in some circustances. See llvm.org/pr38292
 static void emitFieldFromInstruction(formatted_raw_ostream &OS) {
   OS << "// Helper function for extracting fields from encoded instructions.\n"
      << "template<typename InsnType>\n"
-   << "static InsnType fieldFromInstruction(InsnType insn, unsigned startBit,\n"
+     << "#if defined(_MSC_VER) && !defined(__clang__)\n"
+     << "__declspec(noinline)\n"
+     << "#endif\n"
+     << "static InsnType fieldFromInstruction(InsnType insn, unsigned startBit,\n"
      << "                                     unsigned numBits) {\n"
      << "    assert(startBit + numBits <= (sizeof(InsnType)*8) &&\n"
      << "           \"Instruction field out of bounds!\");\n"
@@ -2068,8 +2089,10 @@ static void emitFieldFromInstruction(formatted_raw_ostream &OS) {
 // decodeInstruction().
 static void emitDecodeInstruction(formatted_raw_ostream &OS) {
   OS << "template<typename InsnType>\n"
-     << "static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,\n"
-     << "                                      InsnType insn, uint64_t Address,\n"
+     << "static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], "
+        "MCInst &MI,\n"
+     << "                                      InsnType insn, uint64_t "
+        "Address,\n"
      << "                                      const void *DisAsm,\n"
      << "                                      const MCSubtargetInfo &STI) {\n"
      << "  const FeatureBitset& Bits = STI.getFeatureBits();\n"
@@ -2088,7 +2111,8 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS) {
      << "      unsigned Len = *++Ptr;\n"
      << "      ++Ptr;\n"
      << "      CurFieldValue = fieldFromInstruction(insn, Start, Len);\n"
-     << "      DEBUG(dbgs() << Loc << \": OPC_ExtractField(\" << Start << \", \"\n"
+     << "      LLVM_DEBUG(dbgs() << Loc << \": OPC_ExtractField(\" << Start << "
+        "\", \"\n"
      << "                   << Len << \"): \" << CurFieldValue << \"\\n\");\n"
      << "      break;\n"
      << "    }\n"
@@ -2097,16 +2121,20 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS) {
      << "      unsigned Len;\n"
      << "      InsnType Val = decodeULEB128(++Ptr, &Len);\n"
      << "      Ptr += Len;\n"
-     << "      // NumToSkip is a plain 16-bit integer.\n"
+     << "      // NumToSkip is a plain 24-bit integer.\n"
      << "      unsigned NumToSkip = *Ptr++;\n"
      << "      NumToSkip |= (*Ptr++) << 8;\n"
+     << "      NumToSkip |= (*Ptr++) << 16;\n"
      << "\n"
      << "      // Perform the filter operation.\n"
      << "      if (Val != CurFieldValue)\n"
      << "        Ptr += NumToSkip;\n"
-     << "      DEBUG(dbgs() << Loc << \": OPC_FilterValue(\" << Val << \", \" << NumToSkip\n"
-     << "                   << \"): \" << ((Val != CurFieldValue) ? \"FAIL:\" : \"PASS:\")\n"
-     << "                   << \" continuing at \" << (Ptr - DecodeTable) << \"\\n\");\n"
+     << "      LLVM_DEBUG(dbgs() << Loc << \": OPC_FilterValue(\" << Val << "
+        "\", \" << NumToSkip\n"
+     << "                   << \"): \" << ((Val != CurFieldValue) ? \"FAIL:\" "
+        ": \"PASS:\")\n"
+     << "                   << \" continuing at \" << (Ptr - DecodeTable) << "
+        "\"\\n\");\n"
      << "\n"
      << "      break;\n"
      << "    }\n"
@@ -2117,18 +2145,23 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS) {
      << "      // Decode the field value.\n"
      << "      uint32_t ExpectedValue = decodeULEB128(++Ptr, &Len);\n"
      << "      Ptr += Len;\n"
-     << "      // NumToSkip is a plain 16-bit integer.\n"
+     << "      // NumToSkip is a plain 24-bit integer.\n"
      << "      unsigned NumToSkip = *Ptr++;\n"
      << "      NumToSkip |= (*Ptr++) << 8;\n"
+     << "      NumToSkip |= (*Ptr++) << 16;\n"
      << "\n"
      << "      // If the actual and expected values don't match, skip.\n"
      << "      if (ExpectedValue != FieldValue)\n"
      << "        Ptr += NumToSkip;\n"
-     << "      DEBUG(dbgs() << Loc << \": OPC_CheckField(\" << Start << \", \"\n"
-     << "                   << Len << \", \" << ExpectedValue << \", \" << NumToSkip\n"
-     << "                   << \"): FieldValue = \" << FieldValue << \", ExpectedValue = \"\n"
+     << "      LLVM_DEBUG(dbgs() << Loc << \": OPC_CheckField(\" << Start << "
+        "\", \"\n"
+     << "                   << Len << \", \" << ExpectedValue << \", \" << "
+        "NumToSkip\n"
+     << "                   << \"): FieldValue = \" << FieldValue << \", "
+        "ExpectedValue = \"\n"
      << "                   << ExpectedValue << \": \"\n"
-     << "                   << ((ExpectedValue == FieldValue) ? \"PASS\\n\" : \"FAIL\\n\"));\n"
+     << "                   << ((ExpectedValue == FieldValue) ? \"PASS\\n\" : "
+        "\"FAIL\\n\"));\n"
      << "      break;\n"
      << "    }\n"
      << "    case MCD::OPC_CheckPredicate: {\n"
@@ -2136,15 +2169,17 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS) {
      << "      // Decode the Predicate Index value.\n"
      << "      unsigned PIdx = decodeULEB128(++Ptr, &Len);\n"
      << "      Ptr += Len;\n"
-     << "      // NumToSkip is a plain 16-bit integer.\n"
+     << "      // NumToSkip is a plain 24-bit integer.\n"
      << "      unsigned NumToSkip = *Ptr++;\n"
      << "      NumToSkip |= (*Ptr++) << 8;\n"
+     << "      NumToSkip |= (*Ptr++) << 16;\n"
      << "      // Check the predicate.\n"
      << "      bool Pred;\n"
      << "      if (!(Pred = checkDecoderPredicate(PIdx, Bits)))\n"
      << "        Ptr += NumToSkip;\n"
      << "      (void)Pred;\n"
-     << "      DEBUG(dbgs() << Loc << \": OPC_CheckPredicate(\" << PIdx << \"): \"\n"
+     << "      LLVM_DEBUG(dbgs() << Loc << \": OPC_CheckPredicate(\" << PIdx "
+        "<< \"): \"\n"
      << "            << (Pred ? \"PASS\\n\" : \"FAIL\\n\"));\n"
      << "\n"
      << "      break;\n"
@@ -2160,12 +2195,14 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS) {
      << "      MI.clear();\n"
      << "      MI.setOpcode(Opc);\n"
      << "      bool DecodeComplete;\n"
-     << "      S = decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm, DecodeComplete);\n"
+     << "      S = decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm, "
+        "DecodeComplete);\n"
      << "      assert(DecodeComplete);\n"
      << "\n"
-     << "      DEBUG(dbgs() << Loc << \": OPC_Decode: opcode \" << Opc\n"
+     << "      LLVM_DEBUG(dbgs() << Loc << \": OPC_Decode: opcode \" << Opc\n"
      << "                   << \", using decoder \" << DecodeIdx << \": \"\n"
-     << "                   << (S != MCDisassembler::Fail ? \"PASS\" : \"FAIL\") << \"\\n\");\n"
+     << "                   << (S != MCDisassembler::Fail ? \"PASS\" : "
+        "\"FAIL\") << \"\\n\");\n"
      << "      return S;\n"
      << "    }\n"
      << "    case MCD::OPC_TryDecode: {\n"
@@ -2175,29 +2212,35 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS) {
      << "      Ptr += Len;\n"
      << "      unsigned DecodeIdx = decodeULEB128(Ptr, &Len);\n"
      << "      Ptr += Len;\n"
-     << "      // NumToSkip is a plain 16-bit integer.\n"
+     << "      // NumToSkip is a plain 24-bit integer.\n"
      << "      unsigned NumToSkip = *Ptr++;\n"
      << "      NumToSkip |= (*Ptr++) << 8;\n"
+     << "      NumToSkip |= (*Ptr++) << 16;\n"
      << "\n"
      << "      // Perform the decode operation.\n"
      << "      MCInst TmpMI;\n"
      << "      TmpMI.setOpcode(Opc);\n"
      << "      bool DecodeComplete;\n"
-     << "      S = decodeToMCInst(S, DecodeIdx, insn, TmpMI, Address, DisAsm, DecodeComplete);\n"
-     << "      DEBUG(dbgs() << Loc << \": OPC_TryDecode: opcode \" << Opc\n"
+     << "      S = decodeToMCInst(S, DecodeIdx, insn, TmpMI, Address, DisAsm, "
+        "DecodeComplete);\n"
+     << "      LLVM_DEBUG(dbgs() << Loc << \": OPC_TryDecode: opcode \" << "
+        "Opc\n"
      << "                   << \", using decoder \" << DecodeIdx << \": \");\n"
      << "\n"
      << "      if (DecodeComplete) {\n"
      << "        // Decoding complete.\n"
-     << "        DEBUG(dbgs() << (S != MCDisassembler::Fail ? \"PASS\" : \"FAIL\") << \"\\n\");\n"
+     << "        LLVM_DEBUG(dbgs() << (S != MCDisassembler::Fail ? \"PASS\" : "
+        "\"FAIL\") << \"\\n\");\n"
      << "        MI = TmpMI;\n"
      << "        return S;\n"
      << "      } else {\n"
      << "        assert(S == MCDisassembler::Fail);\n"
      << "        // If the decoding was incomplete, skip.\n"
      << "        Ptr += NumToSkip;\n"
-     << "        DEBUG(dbgs() << \"FAIL: continuing at \" << (Ptr - DecodeTable) << \"\\n\");\n"
-     << "        // Reset decode status. This also drops a SoftFail status that could be\n"
+     << "        LLVM_DEBUG(dbgs() << \"FAIL: continuing at \" << (Ptr - "
+        "DecodeTable) << \"\\n\");\n"
+     << "        // Reset decode status. This also drops a SoftFail status "
+        "that could be\n"
      << "        // set before the decode attempt.\n"
      << "        S = MCDisassembler::Success;\n"
      << "      }\n"
@@ -2213,16 +2256,18 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS) {
      << "      bool Fail = (insn & PositiveMask) || (~insn & NegativeMask);\n"
      << "      if (Fail)\n"
      << "        S = MCDisassembler::SoftFail;\n"
-     << "      DEBUG(dbgs() << Loc << \": OPC_SoftFail: \" << (Fail ? \"FAIL\\n\":\"PASS\\n\"));\n"
+     << "      LLVM_DEBUG(dbgs() << Loc << \": OPC_SoftFail: \" << (Fail ? "
+        "\"FAIL\\n\":\"PASS\\n\"));\n"
      << "      break;\n"
      << "    }\n"
      << "    case MCD::OPC_Fail: {\n"
-     << "      DEBUG(dbgs() << Loc << \": OPC_Fail\\n\");\n"
+     << "      LLVM_DEBUG(dbgs() << Loc << \": OPC_Fail\\n\");\n"
      << "      return MCDisassembler::Fail;\n"
      << "    }\n"
      << "    }\n"
      << "  }\n"
-     << "  llvm_unreachable(\"bogosity detected in disassembler state machine!\");\n"
+     << "  llvm_unreachable(\"bogosity detected in disassembler state "
+        "machine!\");\n"
      << "}\n\n";
 }
 
diff --git a/utils/TableGen/GlobalISelEmitter.cpp b/utils/TableGen/GlobalISelEmitter.cpp
index c7d662db5a2f..69726cc9f257 100644
--- a/utils/TableGen/GlobalISelEmitter.cpp
+++ b/utils/TableGen/GlobalISelEmitter.cpp
@@ -35,11 +35,11 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/Support/CodeGenCoverage.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/LowLevelTypeImpl.h"
+#include "llvm/Support/MachineValueType.h"
 #include "llvm/Support/ScopedPrinter.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
@@ -84,6 +84,8 @@ namespace {
 
 /// Get the name of the enum value used to number the predicate function.
 std::string getEnumNameForPredicate(const TreePredicateFn &Predicate) {
+  if (Predicate.hasGISelPredicateCode())
+    return "GIPFP_MI_" + Predicate.getFnName();
   return "GIPFP_" + Predicate.getImmTypeIdentifier().str() + "_" +
          Predicate.getFnName();
 }
@@ -100,6 +102,7 @@ private:
   LLT Ty;
 
 public:
+  LLTCodeGen() = default;
   LLTCodeGen(const LLT &Ty) : Ty(Ty) {}
 
   std::string getCxxEnumValue() const {
@@ -148,7 +151,7 @@ public:
 
   const LLT &get() const { return Ty; }
 
-  /// This ordering is used for std::unique() and std::sort(). There's no
+  /// This ordering is used for std::unique() and llvm::sort(). There's no
   /// particular logic behind the order but either A < B or B < A must be
   /// true if A != B.
   bool operator<(const LLTCodeGen &Other) const {
@@ -176,6 +179,9 @@ public:
   bool operator==(const LLTCodeGen &B) const { return Ty == B.Ty; }
 };
 
+// Track all types that are used so we can emit the corresponding enum.
+std::set<LLTCodeGen> KnownTypes;
+
 class InstructionMatcher;
 /// Convert an MVT to an equivalent LLT if possible, or the invalid LLT() for
 /// MVTs that don't map cleanly to an LLT (e.g., iPTR, *any, ...).
@@ -260,6 +266,11 @@ std::string explainOperator(Record *Operator) {
             ")")
         .str();
 
+  if (Operator->isSubClassOf("SDNodeXForm"))
+    return (" (Operator is an unmapped SDNodeXForm, " + Operator->getName() +
+            ")")
+        .str();
+
   return (" (Operator " + Operator->getName() + " not understood)").str();
 }
 
@@ -280,12 +291,16 @@ static Error isTrivialOperatorNode(const TreePatternNode *N) {
     if (Predicate.isImmediatePattern())
       continue;
 
-    if (Predicate.isNonExtLoad())
+    if (Predicate.isNonExtLoad() || Predicate.isAnyExtLoad() ||
+        Predicate.isSignExtLoad() || Predicate.isZeroExtLoad())
       continue;
 
     if (Predicate.isNonTruncStore())
       continue;
 
+    if (Predicate.isLoad() && Predicate.getMemoryVT())
+      continue;
+
     if (Predicate.isLoad() || Predicate.isStore()) {
       if (Predicate.isUnindexed())
         continue;
@@ -306,6 +321,9 @@ static Error isTrivialOperatorNode(const TreePatternNode *N) {
          Predicate.isAtomicOrderingWeakerThanRelease()))
       continue;
 
+    if (Predicate.hasGISelPredicateCode())
+      continue;
+
     HasUnsupportedPredicate = true;
     Explanation = Separator + "Has a predicate (" + explainPredicates(N) + ")";
     Separator = ", ";
@@ -315,12 +333,7 @@ static Error isTrivialOperatorNode(const TreePatternNode *N) {
     break;
   }
 
-  if (N->getTransformFn()) {
-    Explanation += Separator + "Has a transform function";
-    Separator = ", ";
-  }
-
-  if (!HasUnsupportedPredicate && !N->getTransformFn())
+  if (!HasUnsupportedPredicate)
     return Error::success();
 
   return failedImport(Explanation);
@@ -394,19 +407,42 @@ public:
   /// A bitfield of RecordFlagsBits flags.
   unsigned Flags;
 
+  /// The actual run-time value, if known
+  int64_t RawValue;
+
   MatchTableRecord(Optional<unsigned> LabelID_, StringRef EmitStr,
-                   unsigned NumElements, unsigned Flags)
+                   unsigned NumElements, unsigned Flags,
+                   int64_t RawValue = std::numeric_limits<int64_t>::min())
       : LabelID(LabelID_.hasValue() ? LabelID_.getValue() : ~0u),
-        EmitStr(EmitStr), NumElements(NumElements), Flags(Flags) {
+        EmitStr(EmitStr), NumElements(NumElements), Flags(Flags),
+        RawValue(RawValue) {
+
     assert((!LabelID_.hasValue() || LabelID != ~0u) &&
            "This value is reserved for non-labels");
   }
+  MatchTableRecord(const MatchTableRecord &Other) = default;
+  MatchTableRecord(MatchTableRecord &&Other) = default;
+
+  /// Useful if a Match Table Record gets optimized out
+  void turnIntoComment() {
+    Flags |= MTRF_Comment;
+    Flags &= ~MTRF_CommaFollows;
+    NumElements = 0;
+  }
+
+  /// For Jump Table generation purposes
+  bool operator<(const MatchTableRecord &Other) const {
+    return RawValue < Other.RawValue;
+  }
+  int64_t getRawValue() const { return RawValue; }
 
   void emit(raw_ostream &OS, bool LineBreakNextAfterThis,
             const MatchTable &Table) const;
   unsigned size() const { return NumElements; }
 };
 
+class Matcher;
+
 /// Holds the contents of a generated MatchTable to enable formatting and the
 /// necessary index tracking needed to support GIM_Try.
 class MatchTable {
@@ -419,10 +455,11 @@ class MatchTable {
   /// The currently defined labels.
   DenseMap<unsigned, unsigned> LabelMap;
   /// Tracks the sum of MatchTableRecord::NumElements as the table is built.
-  unsigned CurrentSize;
-
+  unsigned CurrentSize = 0;
   /// A unique identifier for a MatchTable label.
-  static unsigned CurrentLabelID;
+  unsigned CurrentLabelID = 0;
+  /// Determines if the table should be instrumented for rule coverage tracking.
+  bool IsWithCoverage;
 
 public:
   static MatchTableRecord LineBreak;
@@ -443,11 +480,20 @@ public:
     return MatchTableRecord(None, NamedValue, 1,
                             MatchTableRecord::MTRF_CommaFollows);
   }
+  static MatchTableRecord NamedValue(StringRef NamedValue, int64_t RawValue) {
+    return MatchTableRecord(None, NamedValue, 1,
+                            MatchTableRecord::MTRF_CommaFollows, RawValue);
+  }
   static MatchTableRecord NamedValue(StringRef Namespace,
                                      StringRef NamedValue) {
     return MatchTableRecord(None, (Namespace + "::" + NamedValue).str(), 1,
                             MatchTableRecord::MTRF_CommaFollows);
   }
+  static MatchTableRecord NamedValue(StringRef Namespace, StringRef NamedValue,
+                                     int64_t RawValue) {
+    return MatchTableRecord(None, (Namespace + "::" + NamedValue).str(), 1,
+                            MatchTableRecord::MTRF_CommaFollows, RawValue);
+  }
   static MatchTableRecord IntValue(int64_t IntValue) {
     return MatchTableRecord(None, llvm::to_string(IntValue), 1,
                             MatchTableRecord::MTRF_CommaFollows);
@@ -465,7 +511,12 @@ public:
                                 MatchTableRecord::MTRF_CommaFollows);
   }
 
-  MatchTable(unsigned ID) : ID(ID), CurrentSize(0) {}
+  static MatchTable buildTable(ArrayRef<Matcher *> Rules, bool WithCoverage);
+
+  MatchTable(bool WithCoverage, unsigned ID = 0)
+      : ID(ID), IsWithCoverage(WithCoverage) {}
+
+  bool isWithCoverage() const { return IsWithCoverage; }
 
   void push_back(const MatchTableRecord &Value) {
     if (Value.Flags & MatchTableRecord::MTRF_Label)
@@ -474,7 +525,7 @@ public:
     CurrentSize += Value.size();
   }
 
-  unsigned allocateLabelID() const { return CurrentLabelID++; }
+  unsigned allocateLabelID() { return CurrentLabelID++; }
 
   void defineLabel(unsigned LabelID) {
     LabelMap.insert(std::make_pair(LabelID, CurrentSize));
@@ -519,8 +570,6 @@ public:
   }
 };
 
-unsigned MatchTable::CurrentLabelID = 0;
-
 MatchTableRecord MatchTable::LineBreak = {
     None, "" /* Emit String */, 0 /* Elements */,
     MatchTableRecord::MTRF_LineBreakFollows};
@@ -573,65 +622,172 @@ class RuleMatcher;
 class Matcher {
 public:
   virtual ~Matcher() = default;
+  virtual void optimize() {}
   virtual void emit(MatchTable &Table) = 0;
-  virtual std::unique_ptr<PredicateMatcher> forgetFirstCondition() = 0;
+
+  virtual bool hasFirstCondition() const = 0;
+  virtual const PredicateMatcher &getFirstCondition() const = 0;
+  virtual std::unique_ptr<PredicateMatcher> popFirstCondition() = 0;
 };
 
-class GroupMatcher : public Matcher {
-  SmallVector<std::unique_ptr<PredicateMatcher>, 8> Conditions;
-  SmallVector<Matcher *, 8> Rules;
+MatchTable MatchTable::buildTable(ArrayRef<Matcher *> Rules,
+                                  bool WithCoverage) {
+  MatchTable Table(WithCoverage);
+  for (Matcher *Rule : Rules)
+    Rule->emit(Table);
+
+  return Table << MatchTable::Opcode("GIM_Reject") << MatchTable::LineBreak;
+}
+
+class GroupMatcher final : public Matcher {
+  /// Conditions that form a common prefix of all the matchers contained.
+  SmallVector<std::unique_ptr<PredicateMatcher>, 1> Conditions;
+
+  /// All the nested matchers, sharing a common prefix.
+  std::vector<Matcher *> Matchers;
+
+  /// An owning collection for any auxiliary matchers created while optimizing
+  /// nested matchers contained.
+  std::vector<std::unique_ptr<Matcher>> MatcherStorage;
 
 public:
-  void addCondition(std::unique_ptr<PredicateMatcher> &&Predicate) {
-    Conditions.emplace_back(std::move(Predicate));
+  /// Add a matcher to the collection of nested matchers if it meets the
+  /// requirements, and return true. If it doesn't, do nothing and return false.
+  ///
+  /// Expected to preserve its argument, so it could be moved out later on.
+  bool addMatcher(Matcher &Candidate);
+
+  /// Mark the matcher as fully-built and ensure any invariants expected by both
+  /// optimize() and emit(...) methods. Generally, both sequences of calls
+  /// are expected to lead to a sensible result:
+  ///
+  /// addMatcher(...)*; finalize(); optimize(); emit(...); and
+  /// addMatcher(...)*; finalize(); emit(...);
+  ///
+  /// or generally
+  ///
+  /// addMatcher(...)*; finalize(); { optimize()*; emit(...); }*
+  ///
+  /// Multiple calls to optimize() are expected to be handled gracefully, though
+  /// optimize() is not expected to be idempotent. Multiple calls to finalize()
+  /// aren't generally supported. emit(...) is expected to be non-mutating and
+  /// producing the exact same results upon repeated calls.
+  ///
+  /// addMatcher() calls after the finalize() call are not supported.
+  ///
+  /// finalize() and optimize() are both allowed to mutate the contained
+  /// matchers, so moving them out after finalize() is not supported.
+  void finalize();
+  void optimize() override;
+  void emit(MatchTable &Table) override;
+
+  /// Could be used to move out the matchers added previously, unless finalize()
+  /// has been already called. If any of the matchers are moved out, the group
+  /// becomes safe to destroy, but not safe to re-use for anything else.
+  iterator_range<std::vector<Matcher *>::iterator> matchers() {
+    return make_range(Matchers.begin(), Matchers.end());
   }
-  void addRule(Matcher &Rule) { Rules.push_back(&Rule); }
-  const std::unique_ptr<PredicateMatcher> &conditions_back() const {
-    return Conditions.back();
+  size_t size() const { return Matchers.size(); }
+  bool empty() const { return Matchers.empty(); }
+
+  std::unique_ptr<PredicateMatcher> popFirstCondition() override {
+    assert(!Conditions.empty() &&
+           "Trying to pop a condition from a condition-less group");
+    std::unique_ptr<PredicateMatcher> P = std::move(Conditions.front());
+    Conditions.erase(Conditions.begin());
+    return P;
   }
-  bool lastConditionMatches(const PredicateMatcher &Predicate) const;
-  bool conditions_empty() const { return Conditions.empty(); }
-  void clear() {
-    Conditions.clear();
-    Rules.clear();
+  const PredicateMatcher &getFirstCondition() const override {
+    assert(!Conditions.empty() &&
+           "Trying to get a condition from a condition-less group");
+    return *Conditions.front();
   }
+  bool hasFirstCondition() const override { return !Conditions.empty(); }
+
+private:
+  /// See if a candidate matcher could be added to this group solely by
+  /// analyzing its first condition.
+  bool candidateConditionMatches(const PredicateMatcher &Predicate) const;
+};
+
+class SwitchMatcher : public Matcher {
+  /// All the nested matchers, representing distinct switch-cases. The first
+  /// conditions (as Matcher::getFirstCondition() reports) of all the nested
+  /// matchers must share the same type and path to a value they check, in other
+  /// words, be isIdenticalDownToValue, but have different values they check
+  /// against.
+  std::vector<Matcher *> Matchers;
+
+  /// The representative condition, with a type and a path (InsnVarID and OpIdx
+  /// in most cases)  shared by all the matchers contained.
+  std::unique_ptr<PredicateMatcher> Condition = nullptr;
+
+  /// Temporary set used to check that the case values don't repeat within the
+  /// same switch.
+  std::set<MatchTableRecord> Values;
+
+  /// An owning collection for any auxiliary matchers created while optimizing
+  /// nested matchers contained.
+  std::vector<std::unique_ptr<Matcher>> MatcherStorage;
+
+public:
+  bool addMatcher(Matcher &Candidate);
+
+  void finalize();
   void emit(MatchTable &Table) override;
 
-  std::unique_ptr<PredicateMatcher> forgetFirstCondition() override {
-    // We shouldn't need to mess up with groups, since we
-    // should have merged everything shareable upfront.
-    // If we start to look into reordering predicates,
-    // we may want to reconsider this.
-    assert(0 && "Groups should be formed maximal for now");
-    llvm_unreachable("No need for this for now");
+  iterator_range<std::vector<Matcher *>::iterator> matchers() {
+    return make_range(Matchers.begin(), Matchers.end());
   }
+  size_t size() const { return Matchers.size(); }
+  bool empty() const { return Matchers.empty(); }
+
+  std::unique_ptr<PredicateMatcher> popFirstCondition() override {
+    // SwitchMatcher doesn't have a common first condition for its cases, as all
+    // the cases only share a kind of a value (a type and a path to it) they
+    // match, but deliberately differ in the actual value they match.
+    llvm_unreachable("Trying to pop a condition from a condition-less group");
+  }
+  const PredicateMatcher &getFirstCondition() const override {
+    llvm_unreachable("Trying to pop a condition from a condition-less group");
+  }
+  bool hasFirstCondition() const override { return false; }
+
+private:
+  /// See if the predicate type has a Switch-implementation for it.
+  static bool isSupportedPredicateType(const PredicateMatcher &Predicate);
+
+  bool candidateConditionMatches(const PredicateMatcher &Predicate) const;
+
+  /// emit()-helper
+  static void emitPredicateSpecificOpcodes(const PredicateMatcher &P,
+                                           MatchTable &Table);
 };
 
 /// Generates code to check that a match rule matches.
 class RuleMatcher : public Matcher {
 public:
-  using ActionVec = std::vector<std::unique_ptr<MatchAction>>;
-  using action_iterator = ActionVec::iterator;
+  using ActionList = std::list<std::unique_ptr<MatchAction>>;
+  using action_iterator = ActionList::iterator;
 
 protected:
   /// A list of matchers that all need to succeed for the current rule to match.
   /// FIXME: This currently supports a single match position but could be
   /// extended to support multiple positions to support div/rem fusion or
   /// load-multiple instructions.
-  std::vector<std::unique_ptr<InstructionMatcher>> Matchers;
+  using MatchersTy = std::vector<std::unique_ptr<InstructionMatcher>> ;
+  MatchersTy Matchers;
 
   /// A list of actions that need to be taken when all predicates in this rule
   /// have succeeded.
-  ActionVec Actions;
+  ActionList Actions;
 
-  using DefinedInsnVariablesMap =
-      std::map<const InstructionMatcher *, unsigned>;
+  using DefinedInsnVariablesMap = std::map<InstructionMatcher *, unsigned>;
 
-  /// A map of instruction matchers to the local variables created by
-  /// emitCaptureOpcodes().
+  /// A map of instruction matchers to the local variables
   DefinedInsnVariablesMap InsnVariableIDs;
 
-  using MutatableInsnSet = SmallPtrSet<const InstructionMatcher *, 4>;
+  using MutatableInsnSet = SmallPtrSet<InstructionMatcher *, 4>;
 
   // The set of instruction matchers that have not yet been claimed for mutation
   // by a BuildMI.
@@ -641,7 +797,7 @@ protected:
   /// the renderers.
   StringMap<OperandMatcher *> DefinedOperands;
 
-  /// ID for the next instruction variable defined with defineInsnVar()
+  /// ID for the next instruction variable defined with implicitlyDefineInsnVar()
   unsigned NextInsnVarID;
 
   /// ID for the next output instruction allocated with allocateOutputInsnID()
@@ -651,6 +807,7 @@ protected:
   unsigned NextTempRegID;
 
   std::vector<Record *> RequiredFeatures;
+  std::vector<std::unique_ptr<PredicateMatcher>> EpilogueMatchers;
 
   ArrayRef<SMLoc> SrcLoc;
 
@@ -684,16 +841,9 @@ public:
   action_iterator insertAction(action_iterator InsertPt, Args &&... args);
 
   /// Define an instruction without emitting any code to do so.
-  /// This is used for the root of the match.
-  unsigned implicitlyDefineInsnVar(const InstructionMatcher &Matcher);
-  void clearImplicitMap() {
-    NextInsnVarID = 0;
-    InsnVariableIDs.clear();
-  };
-  /// Define an instruction and emit corresponding state-machine opcodes.
-  unsigned defineInsnVar(MatchTable &Table, const InstructionMatcher &Matcher,
-                         unsigned InsnVarID, unsigned OpIdx);
-  unsigned getInsnVarID(const InstructionMatcher &InsnMatcher) const;
+  unsigned implicitlyDefineInsnVar(InstructionMatcher &Matcher);
+
+  unsigned getInsnVarID(InstructionMatcher &InsnMatcher) const;
   DefinedInsnVariablesMap::const_iterator defined_insn_vars_begin() const {
     return InsnVariableIDs.begin();
   }
@@ -715,7 +865,7 @@ public:
   mutatable_insns() const {
     return make_range(mutatable_insns_begin(), mutatable_insns_end());
   }
-  void reserveInsnMatcherForMutation(const InstructionMatcher *InsnMatcher) {
+  void reserveInsnMatcherForMutation(InstructionMatcher *InsnMatcher) {
     bool R = MutatableInsns.erase(InsnMatcher);
     assert(R && "Reserving a mutatable insn that isn't available");
     (void)R;
@@ -743,11 +893,10 @@ public:
     return I->second;
   }
 
-  const InstructionMatcher &getInstructionMatcher(StringRef SymbolicName) const;
+  InstructionMatcher &getInstructionMatcher(StringRef SymbolicName) const;
   const OperandMatcher &getOperandMatcher(StringRef Name) const;
 
-  void emitCaptureOpcodes(MatchTable &Table);
-
+  void optimize() override;
   void emit(MatchTable &Table) override;
 
   /// Compare the priority of this object and B.
@@ -759,7 +908,12 @@ public:
   /// matcher.
   unsigned countRendererFns() const;
 
-  std::unique_ptr<PredicateMatcher> forgetFirstCondition() override;
+  std::unique_ptr<PredicateMatcher> popFirstCondition() override;
+  const PredicateMatcher &getFirstCondition() const override;
+  LLTCodeGen getFirstConditionAsRootType();
+  bool hasFirstCondition() const override;
+  unsigned getNumOperands() const;
+  StringRef getOpcode() const;
 
   // FIXME: Remove this as soon as possible
   InstructionMatcher &insnmatchers_front() const { return *Matchers.front(); }
@@ -767,6 +921,9 @@ public:
   unsigned allocateOutputInsnID() { return NextOutputInsnID++; }
   unsigned allocateTempRegID() { return NextTempRegID++; }
 
+  iterator_range<MatchersTy::iterator> insnmatchers() {
+    return make_range(Matchers.begin(), Matchers.end());
+  }
   bool insnmatchers_empty() const { return Matchers.empty(); }
   void insnmatchers_pop_front() { Matchers.erase(Matchers.begin()); }
 };
@@ -777,58 +934,69 @@ using action_iterator = RuleMatcher::action_iterator;
 
 template <class PredicateTy> class PredicateListMatcher {
 private:
-  typedef std::vector<std::unique_ptr<PredicateTy>> PredicateVec;
-  PredicateVec Predicates;
-
   /// Template instantiations should specialize this to return a string to use
   /// for the comment emitted when there are no predicates.
   std::string getNoPredicateComment() const;
 
+protected:
+  using PredicatesTy = std::deque<std::unique_ptr<PredicateTy>>;
+  PredicatesTy Predicates;
+
+  /// Track if the list of predicates was manipulated by one of the optimization
+  /// methods.
+  bool Optimized = false;
+
 public:
-  /// Construct a new operand predicate and add it to the matcher.
+  /// Construct a new predicate and add it to the matcher.
   template <class Kind, class... Args>
-  Optional<Kind *> addPredicate(Args&&... args) {
-    Predicates.emplace_back(
-        llvm::make_unique<Kind>(std::forward<Args>(args)...));
-    return static_cast<Kind *>(Predicates.back().get());
-  }
+  Optional<Kind *> addPredicate(Args &&... args);
 
-  typename PredicateVec::const_iterator predicates_begin() const {
+  typename PredicatesTy::iterator predicates_begin() {
     return Predicates.begin();
   }
-  typename PredicateVec::const_iterator predicates_end() const {
+  typename PredicatesTy::iterator predicates_end() {
     return Predicates.end();
   }
-  iterator_range<typename PredicateVec::const_iterator> predicates() const {
+  iterator_range<typename PredicatesTy::iterator> predicates() {
     return make_range(predicates_begin(), predicates_end());
   }
-  typename PredicateVec::size_type predicates_size() const {
+  typename PredicatesTy::size_type predicates_size() const {
     return Predicates.size();
   }
   bool predicates_empty() const { return Predicates.empty(); }
 
   std::unique_ptr<PredicateTy> predicates_pop_front() {
     std::unique_ptr<PredicateTy> Front = std::move(Predicates.front());
-    Predicates.erase(Predicates.begin());
+    Predicates.pop_front();
+    Optimized = true;
     return Front;
   }
 
+  void prependPredicate(std::unique_ptr<PredicateTy> &&Predicate) {
+    Predicates.push_front(std::move(Predicate));
+  }
+
+  void eraseNullPredicates() {
+    const auto NewEnd =
+        std::stable_partition(Predicates.begin(), Predicates.end(),
+                              std::logical_not<std::unique_ptr<PredicateTy>>());
+    if (NewEnd != Predicates.begin()) {
+      Predicates.erase(Predicates.begin(), NewEnd);
+      Optimized = true;
+    }
+  }
+
   /// Emit MatchTable opcodes that tests whether all the predicates are met.
   template <class... Args>
-  void emitPredicateListOpcodes(MatchTable &Table, Args &&... args) const {
-    if (Predicates.empty()) {
+  void emitPredicateListOpcodes(MatchTable &Table, Args &&... args) {
+    if (Predicates.empty() && !Optimized) {
       Table << MatchTable::Comment(getNoPredicateComment())
             << MatchTable::LineBreak;
       return;
     }
 
-    unsigned OpIdx = (*predicates_begin())->getOpIdx();
-    (void)OpIdx;
-    for (const auto &Predicate : predicates()) {
-      assert(Predicate->getOpIdx() == OpIdx &&
-             "Checks touch different operands?");
+    for (const auto &Predicate : predicates())
       Predicate->emitPredicateOpcodes(Table, std::forward<Args>(args)...);
-    }
   }
 };
 
@@ -846,8 +1014,12 @@ public:
   /// are currently not compared between each other.
   enum PredicateKind {
     IPM_Opcode,
+    IPM_NumOperands,
     IPM_ImmPredicate,
     IPM_AtomicOrderingMMO,
+    IPM_MemoryLLTSize,
+    IPM_MemoryVsLLTSize,
+    IPM_GenericPredicate,
     OPM_SameOperand,
     OPM_ComplexPattern,
     OPM_IntrinsicID,
@@ -869,7 +1041,9 @@ public:
   PredicateMatcher(PredicateKind Kind, unsigned InsnVarID, unsigned OpIdx = ~0)
       : Kind(Kind), InsnVarID(InsnVarID), OpIdx(OpIdx) {}
 
+  unsigned getInsnVarID() const { return InsnVarID; }
   unsigned getOpIdx() const { return OpIdx; }
+
   virtual ~PredicateMatcher() = default;
   /// Emit MatchTable opcodes that check the predicate for the given operand.
   virtual void emitPredicateOpcodes(MatchTable &Table,
@@ -878,16 +1052,23 @@ public:
   PredicateKind getKind() const { return Kind; }
 
   virtual bool isIdentical(const PredicateMatcher &B) const {
-    if (InsnVarID != 0 || OpIdx != (unsigned)~0) {
-      // We currently don't hoist the record of instruction properly.
-      // Therefore we can only work on the orig instruction (InsnVarID
-      // == 0).
-      DEBUG(dbgs() << "Non-zero instr ID not supported yet\n");
-      return false;
-    }
     return B.getKind() == getKind() && InsnVarID == B.InsnVarID &&
            OpIdx == B.OpIdx;
   }
+
+  virtual bool isIdenticalDownToValue(const PredicateMatcher &B) const {
+    return hasValue() && PredicateMatcher::isIdentical(B);
+  }
+
+  virtual MatchTableRecord getValue() const {
+    assert(hasValue() && "Can not get a value of a value-less predicate!");
+    llvm_unreachable("Not implemented yet");
+  }
+  virtual bool hasValue() const { return false; }
+
+  /// Report the maximum number of temporary operands needed by the predicate
+  /// matcher.
+  virtual unsigned countRendererFns() const { return 0; }
 };
 
 /// Generates code to check a predicate of an operand.
@@ -903,20 +1084,10 @@ public:
       : PredicateMatcher(Kind, InsnVarID, OpIdx) {}
   virtual ~OperandPredicateMatcher() {}
 
-  /// Emit MatchTable opcodes to capture instructions into the MIs table.
-  ///
-  /// Only InstructionOperandMatcher needs to do anything for this method the
-  /// rest just walk the tree.
-  virtual void emitCaptureOpcodes(MatchTable &Table, RuleMatcher &Rule) const {}
-
   /// Compare the priority of this object and B.
   ///
   /// Returns true if this object is more important than B.
   virtual bool isHigherPriorityThan(const OperandPredicateMatcher &B) const;
-
-  /// Report the maximum number of temporary operands needed by the predicate
-  /// matcher.
-  virtual unsigned countRendererFns() const { return 0; }
 };
 
 template <>
@@ -935,12 +1106,17 @@ public:
       : OperandPredicateMatcher(OPM_SameOperand, InsnVarID, OpIdx),
         MatchingName(MatchingName) {}
 
-  static bool classof(const OperandPredicateMatcher *P) {
+  static bool classof(const PredicateMatcher *P) {
     return P->getKind() == OPM_SameOperand;
   }
 
   void emitPredicateOpcodes(MatchTable &Table,
                             RuleMatcher &Rule) const override;
+
+  bool isIdentical(const PredicateMatcher &B) const override {
+    return OperandPredicateMatcher::isIdentical(B) &&
+           MatchingName == cast<SameOperandMatcher>(&B)->MatchingName;
+  }
 };
 
 /// Generates code to check that an operand is a particular LLT.
@@ -949,7 +1125,15 @@ protected:
   LLTCodeGen Ty;
 
 public:
-  static std::set<LLTCodeGen> KnownTypes;
+  static std::map<LLTCodeGen, unsigned> TypeIDValues;
+
+  static void initTypeIDValuesMap() {
+    TypeIDValues.clear();
+
+    unsigned ID = 0;
+    for (const LLTCodeGen LLTy : KnownTypes)
+      TypeIDValues[LLTy] = ID++;
+  }
 
   LLTOperandMatcher(unsigned InsnVarID, unsigned OpIdx, const LLTCodeGen &Ty)
       : OperandPredicateMatcher(OPM_LLT, InsnVarID, OpIdx), Ty(Ty) {
@@ -963,18 +1147,30 @@ public:
     return OperandPredicateMatcher::isIdentical(B) &&
            Ty == cast<LLTOperandMatcher>(&B)->Ty;
   }
+  MatchTableRecord getValue() const override {
+    const auto VI = TypeIDValues.find(Ty);
+    if (VI == TypeIDValues.end())
+      return MatchTable::NamedValue(getTy().getCxxEnumValue());
+    return MatchTable::NamedValue(getTy().getCxxEnumValue(), VI->second);
+  }
+  bool hasValue() const override {
+    if (TypeIDValues.size() != KnownTypes.size())
+      initTypeIDValuesMap();
+    return TypeIDValues.count(Ty);
+  }
+
+  LLTCodeGen getTy() const { return Ty; }
 
   void emitPredicateOpcodes(MatchTable &Table,
                             RuleMatcher &Rule) const override {
     Table << MatchTable::Opcode("GIM_CheckType") << MatchTable::Comment("MI")
           << MatchTable::IntValue(InsnVarID) << MatchTable::Comment("Op")
           << MatchTable::IntValue(OpIdx) << MatchTable::Comment("Type")
-          << MatchTable::NamedValue(Ty.getCxxEnumValue())
-          << MatchTable::LineBreak;
+          << getValue() << MatchTable::LineBreak;
   }
 };
 
-std::set<LLTCodeGen> LLTOperandMatcher::KnownTypes;
+std::map<LLTCodeGen, unsigned> LLTOperandMatcher::TypeIDValues;
 
 /// Generates code to check that an operand is a pointer to any address space.
 ///
@@ -1207,7 +1403,18 @@ public:
     assert(SymbolicName.empty() && "Operand already has a symbolic name");
     SymbolicName = Name;
   }
-  unsigned getOperandIndex() const { return OpIdx; }
+
+  /// Construct a new operand predicate and add it to the matcher.
+  template <class Kind, class... Args>
+  Optional<Kind *> addPredicate(Args &&... args) {
+    if (isSameAsAnotherOperand())
+      return None;
+    Predicates.emplace_back(llvm::make_unique<Kind>(
+        getInsnVarID(), getOpIdx(), std::forward<Args>(args)...));
+    return static_cast<Kind *>(Predicates.back().get());
+  }
+
+  unsigned getOpIdx() const { return OpIdx; }
   unsigned getInsnVarID() const;
 
   std::string getOperandExpr(unsigned InsnVarID) const {
@@ -1220,23 +1427,19 @@ public:
   Error addTypeCheckPredicate(const TypeSetByHwMode &VTy,
                               bool OperandIsAPointer);
 
-  /// Emit MatchTable opcodes to capture instructions into the MIs table.
-  void emitCaptureOpcodes(MatchTable &Table, RuleMatcher &Rule) const {
-    for (const auto &Predicate : predicates())
-      Predicate->emitCaptureOpcodes(Table, Rule);
-  }
-
   /// Emit MatchTable opcodes that test whether the instruction named in
   /// InsnVarID matches all the predicates and all the operands.
-  void emitPredicateOpcodes(MatchTable &Table, RuleMatcher &Rule) const {
-    std::string Comment;
-    raw_string_ostream CommentOS(Comment);
-    CommentOS << "MIs[" << getInsnVarID() << "] ";
-    if (SymbolicName.empty())
-      CommentOS << "Operand " << OpIdx;
-    else
-      CommentOS << SymbolicName;
-    Table << MatchTable::Comment(CommentOS.str()) << MatchTable::LineBreak;
+  void emitPredicateOpcodes(MatchTable &Table, RuleMatcher &Rule) {
+    if (!Optimized) {
+      std::string Comment;
+      raw_string_ostream CommentOS(Comment);
+      CommentOS << "MIs[" << getInsnVarID() << "] ";
+      if (SymbolicName.empty())
+        CommentOS << "Operand " << OpIdx;
+      else
+        CommentOS << SymbolicName;
+      Table << MatchTable::Comment(CommentOS.str()) << MatchTable::LineBreak;
+    }
 
     emitPredicateListOpcodes(Table, Rule);
   }
@@ -1244,7 +1447,7 @@ public:
   /// Compare the priority of this object and B.
   ///
   /// Returns true if this object is more important than B.
-  bool isHigherPriorityThan(const OperandMatcher &B) const {
+  bool isHigherPriorityThan(OperandMatcher &B) {
     // Operand matchers involving more predicates have higher priority.
     if (predicates_size() > B.predicates_size())
       return true;
@@ -1252,7 +1455,7 @@ public:
       return false;
 
     // This assumes that predicates are added in a consistent order.
-    for (const auto &Predicate : zip(predicates(), B.predicates())) {
+    for (auto &&Predicate : zip(predicates(), B.predicates())) {
       if (std::get<0>(Predicate)->isHigherPriorityThan(*std::get<1>(Predicate)))
         return true;
       if (std::get<1>(Predicate)->isHigherPriorityThan(*std::get<0>(Predicate)))
@@ -1264,7 +1467,7 @@ public:
 
   /// Report the maximum number of temporary operands needed by the operand
   /// matcher.
-  unsigned countRendererFns() const {
+  unsigned countRendererFns() {
     return std::accumulate(
         predicates().begin(), predicates().end(), 0,
         [](unsigned A,
@@ -1277,7 +1480,7 @@ public:
     return AllocatedTemporariesBaseID;
   }
 
-  bool isSameAsAnotherOperand() const {
+  bool isSameAsAnotherOperand() {
     for (const auto &Predicate : predicates())
       if (isa<SameOperandMatcher>(Predicate))
         return true;
@@ -1285,21 +1488,6 @@ public:
   }
 };
 
-// Specialize OperandMatcher::addPredicate() to refrain from adding redundant
-// predicates.
-template <>
-template <class Kind, class... Args>
-Optional<Kind *>
-PredicateListMatcher<OperandPredicateMatcher>::addPredicate(Args &&... args) {
-  auto *OpMatcher = static_cast<OperandMatcher *>(this);
-  if (static_cast<OperandMatcher *>(this)->isSameAsAnotherOperand())
-    return None;
-  Predicates.emplace_back(llvm::make_unique<Kind>(OpMatcher->getInsnVarID(),
-                                                  OpMatcher->getOperandIndex(),
-                                                  std::forward<Args>(args)...));
-  return static_cast<Kind *>(Predicates.back().get());
-}
-
 Error OperandMatcher::addTypeCheckPredicate(const TypeSetByHwMode &VTy,
                                             bool OperandIsAPointer) {
   if (!VTy.isMachineValueType())
@@ -1343,15 +1531,11 @@ public:
   isHigherPriorityThan(const InstructionPredicateMatcher &B) const {
     return Kind < B.Kind;
   };
-
-  /// Report the maximum number of temporary operands needed by the predicate
-  /// matcher.
-  virtual unsigned countRendererFns() const { return 0; }
 };
 
 template <>
 std::string
-PredicateListMatcher<InstructionPredicateMatcher>::getNoPredicateComment() const {
+PredicateListMatcher<PredicateMatcher>::getNoPredicateComment() const {
   return "No instruction predicates";
 }
 
@@ -1360,7 +1544,17 @@ class InstructionOpcodeMatcher : public InstructionPredicateMatcher {
 protected:
   const CodeGenInstruction *I;
 
+  static DenseMap<const CodeGenInstruction *, unsigned> OpcodeValues;
+
 public:
+  static void initOpcodeValuesMap(const CodeGenTarget &Target) {
+    OpcodeValues.clear();
+
+    unsigned OpcodeValue = 0;
+    for (const CodeGenInstruction *I : Target.getInstructionsByEnumValue())
+      OpcodeValues[I] = OpcodeValue++;
+  }
+
   InstructionOpcodeMatcher(unsigned InsnVarID, const CodeGenInstruction *I)
       : InstructionPredicateMatcher(IPM_Opcode, InsnVarID), I(I) {}
 
@@ -1372,12 +1566,19 @@ public:
     return InstructionPredicateMatcher::isIdentical(B) &&
            I == cast<InstructionOpcodeMatcher>(&B)->I;
   }
+  MatchTableRecord getValue() const override {
+    const auto VI = OpcodeValues.find(I);
+    if (VI != OpcodeValues.end())
+      return MatchTable::NamedValue(I->Namespace, I->TheDef->getName(),
+                                    VI->second);
+    return MatchTable::NamedValue(I->Namespace, I->TheDef->getName());
+  }
+  bool hasValue() const override { return OpcodeValues.count(I); }
 
   void emitPredicateOpcodes(MatchTable &Table,
                             RuleMatcher &Rule) const override {
     Table << MatchTable::Opcode("GIM_CheckOpcode") << MatchTable::Comment("MI")
-          << MatchTable::IntValue(InsnVarID)
-          << MatchTable::NamedValue(I->Namespace, I->TheDef->getName())
+          << MatchTable::IntValue(InsnVarID) << getValue()
           << MatchTable::LineBreak;
   }
 
@@ -1404,6 +1605,42 @@ public:
   bool isConstantInstruction() const {
     return I->TheDef->getName() == "G_CONSTANT";
   }
+
+  StringRef getOpcode() const { return I->TheDef->getName(); }
+  unsigned getNumOperands() const { return I->Operands.size(); }
+
+  StringRef getOperandType(unsigned OpIdx) const {
+    return I->Operands[OpIdx].OperandType;
+  }
+};
+
+DenseMap<const CodeGenInstruction *, unsigned>
+    InstructionOpcodeMatcher::OpcodeValues;
+
+class InstructionNumOperandsMatcher final : public InstructionPredicateMatcher {
+  unsigned NumOperands = 0;
+
+public:
+  InstructionNumOperandsMatcher(unsigned InsnVarID, unsigned NumOperands)
+      : InstructionPredicateMatcher(IPM_NumOperands, InsnVarID),
+        NumOperands(NumOperands) {}
+
+  static bool classof(const PredicateMatcher *P) {
+    return P->getKind() == IPM_NumOperands;
+  }
+
+  bool isIdentical(const PredicateMatcher &B) const override {
+    return InstructionPredicateMatcher::isIdentical(B) &&
+           NumOperands == cast<InstructionNumOperandsMatcher>(&B)->NumOperands;
+  }
+
+  void emitPredicateOpcodes(MatchTable &Table,
+                            RuleMatcher &Rule) const override {
+    Table << MatchTable::Opcode("GIM_CheckNumOperands")
+          << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+          << MatchTable::Comment("Expected")
+          << MatchTable::IntValue(NumOperands) << MatchTable::LineBreak;
+  }
 };
 
 /// Generates code to check that this instruction is a constant whose value
@@ -1483,10 +1720,17 @@ public:
       : InstructionPredicateMatcher(IPM_AtomicOrderingMMO, InsnVarID),
         Order(Order), Comparator(Comparator) {}
 
-  static bool classof(const InstructionPredicateMatcher *P) {
+  static bool classof(const PredicateMatcher *P) {
     return P->getKind() == IPM_AtomicOrderingMMO;
   }
 
+  bool isIdentical(const PredicateMatcher &B) const override {
+    if (!InstructionPredicateMatcher::isIdentical(B))
+      return false;
+    const auto &R = *cast<AtomicOrderingMMOPredicateMatcher>(&B);
+    return Order == R.Order && Comparator == R.Comparator;
+  }
+
   void emitPredicateOpcodes(MatchTable &Table,
                             RuleMatcher &Rule) const override {
     StringRef Opcode = "GIM_CheckAtomicOrdering";
@@ -1503,14 +1747,113 @@ public:
   }
 };
 
+/// Generates code to check that the size of an MMO is exactly N bytes.
+class MemorySizePredicateMatcher : public InstructionPredicateMatcher {
+protected:
+  unsigned MMOIdx;
+  uint64_t Size;
+
+public:
+  MemorySizePredicateMatcher(unsigned InsnVarID, unsigned MMOIdx, unsigned Size)
+      : InstructionPredicateMatcher(IPM_MemoryLLTSize, InsnVarID),
+        MMOIdx(MMOIdx), Size(Size) {}
+
+  static bool classof(const PredicateMatcher *P) {
+    return P->getKind() == IPM_MemoryLLTSize;
+  }
+  bool isIdentical(const PredicateMatcher &B) const override {
+    return InstructionPredicateMatcher::isIdentical(B) &&
+           MMOIdx == cast<MemorySizePredicateMatcher>(&B)->MMOIdx &&
+           Size == cast<MemorySizePredicateMatcher>(&B)->Size;
+  }
+
+  void emitPredicateOpcodes(MatchTable &Table,
+                            RuleMatcher &Rule) const override {
+    Table << MatchTable::Opcode("GIM_CheckMemorySizeEqualTo")
+          << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+          << MatchTable::Comment("MMO") << MatchTable::IntValue(MMOIdx)
+          << MatchTable::Comment("Size") << MatchTable::IntValue(Size)
+          << MatchTable::LineBreak;
+  }
+};
+
+/// Generates code to check that the size of an MMO is less-than, equal-to, or
+/// greater than a given LLT.
+class MemoryVsLLTSizePredicateMatcher : public InstructionPredicateMatcher {
+public:
+  enum RelationKind {
+    GreaterThan,
+    EqualTo,
+    LessThan,
+  };
+
+protected:
+  unsigned MMOIdx;
+  RelationKind Relation;
+  unsigned OpIdx;
+
+public:
+  MemoryVsLLTSizePredicateMatcher(unsigned InsnVarID, unsigned MMOIdx,
+                                  enum RelationKind Relation,
+                                  unsigned OpIdx)
+      : InstructionPredicateMatcher(IPM_MemoryVsLLTSize, InsnVarID),
+        MMOIdx(MMOIdx), Relation(Relation), OpIdx(OpIdx) {}
+
+  static bool classof(const PredicateMatcher *P) {
+    return P->getKind() == IPM_MemoryVsLLTSize;
+  }
+  bool isIdentical(const PredicateMatcher &B) const override {
+    return InstructionPredicateMatcher::isIdentical(B) &&
+           MMOIdx == cast<MemoryVsLLTSizePredicateMatcher>(&B)->MMOIdx &&
+           Relation == cast<MemoryVsLLTSizePredicateMatcher>(&B)->Relation &&
+           OpIdx == cast<MemoryVsLLTSizePredicateMatcher>(&B)->OpIdx;
+  }
+
+  void emitPredicateOpcodes(MatchTable &Table,
+                            RuleMatcher &Rule) const override {
+    Table << MatchTable::Opcode(Relation == EqualTo
+                                    ? "GIM_CheckMemorySizeEqualToLLT"
+                                    : Relation == GreaterThan
+                                          ? "GIM_CheckMemorySizeGreaterThanLLT"
+                                          : "GIM_CheckMemorySizeLessThanLLT")
+          << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+          << MatchTable::Comment("MMO") << MatchTable::IntValue(MMOIdx)
+          << MatchTable::Comment("OpIdx") << MatchTable::IntValue(OpIdx)
+          << MatchTable::LineBreak;
+  }
+};
+
+/// Generates code to check an arbitrary C++ instruction predicate.
+class GenericInstructionPredicateMatcher : public InstructionPredicateMatcher {
+protected:
+  TreePredicateFn Predicate;
+
+public:
+  GenericInstructionPredicateMatcher(unsigned InsnVarID,
+                                     TreePredicateFn Predicate)
+      : InstructionPredicateMatcher(IPM_GenericPredicate, InsnVarID),
+        Predicate(Predicate) {}
+
+  static bool classof(const InstructionPredicateMatcher *P) {
+    return P->getKind() == IPM_GenericPredicate;
+  }
+  void emitPredicateOpcodes(MatchTable &Table,
+                            RuleMatcher &Rule) const override {
+    Table << MatchTable::Opcode("GIM_CheckCxxInsnPredicate")
+          << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+          << MatchTable::Comment("FnId")
+          << MatchTable::NamedValue(getEnumNameForPredicate(Predicate))
+          << MatchTable::LineBreak;
+  }
+};
+
 /// Generates code to check that a set of predicates and operands match for a
 /// particular instruction.
 ///
 /// Typical predicates include:
 /// * Has a specific opcode.
 /// * Has an nsw/nuw flag or doesn't.
-class InstructionMatcher
-    : public PredicateListMatcher<InstructionPredicateMatcher> {
+class InstructionMatcher final : public PredicateListMatcher<PredicateMatcher> {
 protected:
   typedef std::vector<std::unique_ptr<OperandMatcher>> OperandVec;
 
@@ -1519,6 +1862,7 @@ protected:
   /// The operands to match. All rendered operands must be present even if the
   /// condition is always true.
   OperandVec Operands;
+  bool NumOperandsCheck = true;
 
   std::string SymbolicName;
   unsigned InsnVarID;
@@ -1531,9 +1875,17 @@ public:
     InsnVarID = Rule.implicitlyDefineInsnVar(*this);
   }
 
+  /// Construct a new instruction predicate and add it to the matcher.
+  template <class Kind, class... Args>
+  Optional<Kind *> addPredicate(Args &&... args) {
+    Predicates.emplace_back(
+        llvm::make_unique<Kind>(getInsnVarID(), std::forward<Args>(args)...));
+    return static_cast<Kind *>(Predicates.back().get());
+  }
+
   RuleMatcher &getRuleMatcher() const { return Rule; }
 
-  unsigned getVarID() const { return InsnVarID; }
+  unsigned getInsnVarID() const { return InsnVarID; }
 
   /// Add an operand to the matcher.
   OperandMatcher &addOperand(unsigned OpIdx, const std::string &SymbolicName,
@@ -1549,7 +1901,7 @@ public:
   OperandMatcher &getOperand(unsigned OpIdx) {
     auto I = std::find_if(Operands.begin(), Operands.end(),
                           [&OpIdx](const std::unique_ptr<OperandMatcher> &X) {
-                            return X->getOperandIndex() == OpIdx;
+                            return X->getOpIdx() == OpIdx;
                           });
     if (I != Operands.end())
       return **I;
@@ -1572,21 +1924,17 @@ public:
 
   void pop_front() { Operands.erase(Operands.begin()); }
 
-  /// Emit MatchTable opcodes to check the shape of the match and capture
-  /// instructions into the MIs table.
-  void emitCaptureOpcodes(MatchTable &Table, RuleMatcher &Rule) {
-    Table << MatchTable::Opcode("GIM_CheckNumOperands")
-          << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
-          << MatchTable::Comment("Expected")
-          << MatchTable::IntValue(getNumOperands()) << MatchTable::LineBreak;
-    for (const auto &Operand : Operands)
-      Operand->emitCaptureOpcodes(Table, Rule);
-  }
+  void optimize();
 
   /// Emit MatchTable opcodes that test whether the instruction named in
   /// InsnVarName matches all the predicates and all the operands.
-  void emitPredicateOpcodes(MatchTable &Table, RuleMatcher &Rule) const {
+  void emitPredicateOpcodes(MatchTable &Table, RuleMatcher &Rule) {
+    if (NumOperandsCheck)
+      InstructionNumOperandsMatcher(InsnVarID, getNumOperands())
+          .emitPredicateOpcodes(Table, Rule);
+
     emitPredicateListOpcodes(Table, Rule);
+
     for (const auto &Operand : Operands)
       Operand->emitPredicateOpcodes(Table, Rule);
   }
@@ -1594,17 +1942,19 @@ public:
   /// Compare the priority of this object and B.
   ///
   /// Returns true if this object is more important than B.
-  bool isHigherPriorityThan(const InstructionMatcher &B) const {
+  bool isHigherPriorityThan(InstructionMatcher &B) {
     // Instruction matchers involving more operands have higher priority.
     if (Operands.size() > B.Operands.size())
       return true;
     if (Operands.size() < B.Operands.size())
       return false;
 
-    for (const auto &Predicate : zip(predicates(), B.predicates())) {
-      if (std::get<0>(Predicate)->isHigherPriorityThan(*std::get<1>(Predicate)))
+    for (auto &&P : zip(predicates(), B.predicates())) {
+      auto L = static_cast<InstructionPredicateMatcher *>(std::get<0>(P).get());
+      auto R = static_cast<InstructionPredicateMatcher *>(std::get<1>(P).get());
+      if (L->isHigherPriorityThan(*R))
         return true;
-      if (std::get<1>(Predicate)->isHigherPriorityThan(*std::get<0>(Predicate)))
+      if (R->isHigherPriorityThan(*L))
         return false;
     }
 
@@ -1620,13 +1970,13 @@ public:
 
   /// Report the maximum number of temporary operands needed by the instruction
   /// matcher.
-  unsigned countRendererFns() const {
-    return std::accumulate(predicates().begin(), predicates().end(), 0,
-                           [](unsigned A,
-                              const std::unique_ptr<InstructionPredicateMatcher>
-                                  &Predicate) {
-                             return A + Predicate->countRendererFns();
-                           }) +
+  unsigned countRendererFns() {
+    return std::accumulate(
+               predicates().begin(), predicates().end(), 0,
+               [](unsigned A,
+                  const std::unique_ptr<PredicateMatcher> &Predicate) {
+                 return A + Predicate->countRendererFns();
+               }) +
            std::accumulate(
                Operands.begin(), Operands.end(), 0,
                [](unsigned A, const std::unique_ptr<OperandMatcher> &Operand) {
@@ -1634,24 +1984,36 @@ public:
                });
   }
 
-  bool isConstantInstruction() const {
-    for (const auto &P : predicates())
-      if (const InstructionOpcodeMatcher *Opcode =
-              dyn_cast<InstructionOpcodeMatcher>(P.get()))
-        return Opcode->isConstantInstruction();
-    return false;
+  InstructionOpcodeMatcher &getOpcodeMatcher() {
+    for (auto &P : predicates())
+      if (auto *OpMatcher = dyn_cast<InstructionOpcodeMatcher>(P.get()))
+        return *OpMatcher;
+    llvm_unreachable("Didn't find an opcode matcher");
+  }
+
+  bool isConstantInstruction() {
+    return getOpcodeMatcher().isConstantInstruction();
   }
+
+  StringRef getOpcode() { return getOpcodeMatcher().getOpcode(); }
 };
 
-template <>
-template <class Kind, class... Args>
-Optional<Kind *>
-PredicateListMatcher<InstructionPredicateMatcher>::addPredicate(
-    Args &&... args) {
-  InstructionMatcher *InstMatcher = static_cast<InstructionMatcher *>(this);
-  Predicates.emplace_back(llvm::make_unique<Kind>(InstMatcher->getVarID(),
-                                                  std::forward<Args>(args)...));
-  return static_cast<Kind *>(Predicates.back().get());
+StringRef RuleMatcher::getOpcode() const {
+  return Matchers.front()->getOpcode();
+}
+
+unsigned RuleMatcher::getNumOperands() const {
+  return Matchers.front()->getNumOperands();
+}
+
+LLTCodeGen RuleMatcher::getFirstConditionAsRootType() {
+  InstructionMatcher &InsnMatcher = *Matchers.front();
+  if (!InsnMatcher.predicates_empty())
+    if (const auto *TM =
+            dyn_cast<LLTOperandMatcher>(&**InsnMatcher.predicates_begin()))
+      if (TM->getInsnVarID() == 0 && TM->getOpIdx() == 0)
+        return TM->getTy();
+  return {};
 }
 
 /// Generates code to check that the operand is a register defined by an
@@ -1679,21 +2041,73 @@ public:
 
   InstructionMatcher &getInsnMatcher() const { return *InsnMatcher; }
 
-  void emitCaptureOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
-    unsigned InsnID =
-        Rule.defineInsnVar(Table, *InsnMatcher, InsnVarID, getOpIdx());
-    (void)InsnID;
-    assert(InsnMatcher->getVarID() == InsnID &&
-           "Mismatch between build and emit");
-    InsnMatcher->emitCaptureOpcodes(Table, Rule);
+  void emitCaptureOpcodes(MatchTable &Table, RuleMatcher &Rule) const {
+    const unsigned NewInsnVarID = InsnMatcher->getInsnVarID();
+    Table << MatchTable::Opcode("GIM_RecordInsn")
+          << MatchTable::Comment("DefineMI")
+          << MatchTable::IntValue(NewInsnVarID) << MatchTable::Comment("MI")
+          << MatchTable::IntValue(getInsnVarID())
+          << MatchTable::Comment("OpIdx") << MatchTable::IntValue(getOpIdx())
+          << MatchTable::Comment("MIs[" + llvm::to_string(NewInsnVarID) + "]")
+          << MatchTable::LineBreak;
   }
 
   void emitPredicateOpcodes(MatchTable &Table,
                             RuleMatcher &Rule) const override {
+    emitCaptureOpcodes(Table, Rule);
     InsnMatcher->emitPredicateOpcodes(Table, Rule);
   }
+
+  bool isHigherPriorityThan(const OperandPredicateMatcher &B) const override {
+    if (OperandPredicateMatcher::isHigherPriorityThan(B))
+      return true;
+    if (B.OperandPredicateMatcher::isHigherPriorityThan(*this))
+      return false;
+
+    if (const InstructionOperandMatcher *BP =
+            dyn_cast<InstructionOperandMatcher>(&B))
+      if (InsnMatcher->isHigherPriorityThan(*BP->InsnMatcher))
+        return true;
+    return false;
+  }
 };
 
+void InstructionMatcher::optimize() {
+  SmallVector<std::unique_ptr<PredicateMatcher>, 8> Stash;
+  const auto &OpcMatcher = getOpcodeMatcher();
+
+  Stash.push_back(predicates_pop_front());
+  if (Stash.back().get() == &OpcMatcher) {
+    if (NumOperandsCheck && OpcMatcher.getNumOperands() < getNumOperands())
+      Stash.emplace_back(
+          new InstructionNumOperandsMatcher(InsnVarID, getNumOperands()));
+    NumOperandsCheck = false;
+
+    for (auto &OM : Operands)
+      for (auto &OP : OM->predicates())
+        if (isa<IntrinsicIDOperandMatcher>(OP)) {
+          Stash.push_back(std::move(OP));
+          OM->eraseNullPredicates();
+          break;
+        }
+  }
+
+  if (InsnVarID > 0) {
+    assert(!Operands.empty() && "Nested instruction is expected to def a vreg");
+    for (auto &OP : Operands[0]->predicates())
+      OP.reset();
+    Operands[0]->eraseNullPredicates();
+  }
+  for (auto &OM : Operands) {
+    for (auto &OP : OM->predicates())
+      if (isa<LLTOperandMatcher>(OP))
+        Stash.push_back(std::move(OP));
+    OM->eraseNullPredicates();
+  }
+  while (!Stash.empty())
+    prependPredicate(Stash.pop_back_val());
+}
+
 //===- Actions ------------------------------------------------------------===//
 class OperandRenderer {
 public:
@@ -1706,7 +2120,8 @@ public:
     OR_Imm,
     OR_Register,
     OR_TempRegister,
-    OR_ComplexPattern
+    OR_ComplexPattern,
+    OR_Custom
   };
 
 protected:
@@ -1749,7 +2164,7 @@ public:
     Table << MatchTable::Opcode("GIR_Copy") << MatchTable::Comment("NewInsnID")
           << MatchTable::IntValue(NewInsnID) << MatchTable::Comment("OldInsnID")
           << MatchTable::IntValue(OldInsnVarID) << MatchTable::Comment("OpIdx")
-          << MatchTable::IntValue(Operand.getOperandIndex())
+          << MatchTable::IntValue(Operand.getOpIdx())
           << MatchTable::Comment(SymbolicName) << MatchTable::LineBreak;
   }
 };
@@ -1785,7 +2200,7 @@ public:
           << MatchTable::Comment("NewInsnID") << MatchTable::IntValue(NewInsnID)
           << MatchTable::Comment("OldInsnID")
           << MatchTable::IntValue(OldInsnVarID) << MatchTable::Comment("OpIdx")
-          << MatchTable::IntValue(Operand.getOperandIndex())
+          << MatchTable::IntValue(Operand.getOpIdx())
           << MatchTable::NamedValue(
                  (ZeroRegisterDef->getValue("Namespace")
                       ? ZeroRegisterDef->getValueAsString("Namespace")
@@ -1816,7 +2231,7 @@ public:
   const StringRef getSymbolicName() const { return SymbolicName; }
 
   void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
-    const InstructionMatcher &InsnMatcher = Rule.getInstructionMatcher(SymbolicName);
+    InstructionMatcher &InsnMatcher = Rule.getInstructionMatcher(SymbolicName);
     unsigned OldInsnVarID = Rule.getInsnVarID(InsnMatcher);
     Table << MatchTable::Opcode(Signed ? "GIR_CopyConstantAsSImm"
                                        : "GIR_CopyConstantAsUImm")
@@ -1847,7 +2262,7 @@ public:
   const StringRef getSymbolicName() const { return SymbolicName; }
 
   void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
-    const InstructionMatcher &InsnMatcher = Rule.getInstructionMatcher(SymbolicName);
+    InstructionMatcher &InsnMatcher = Rule.getInstructionMatcher(SymbolicName);
     unsigned OldInsnVarID = Rule.getInsnVarID(InsnMatcher);
     Table << MatchTable::Opcode("GIR_CopyFConstantAsFPImm")
           << MatchTable::Comment("NewInsnID") << MatchTable::IntValue(NewInsnID)
@@ -1887,7 +2302,7 @@ public:
           << MatchTable::Comment("NewInsnID") << MatchTable::IntValue(NewInsnID)
           << MatchTable::Comment("OldInsnID")
           << MatchTable::IntValue(OldInsnVarID) << MatchTable::Comment("OpIdx")
-          << MatchTable::IntValue(Operand.getOperandIndex())
+          << MatchTable::IntValue(Operand.getOpIdx())
           << MatchTable::Comment("SubRegIdx")
           << MatchTable::IntValue(SubReg->EnumValue)
           << MatchTable::Comment(SymbolicName) << MatchTable::LineBreak;
@@ -2018,6 +2433,37 @@ public:
   }
 };
 
+class CustomRenderer : public OperandRenderer {
+protected:
+  unsigned InsnID;
+  const Record &Renderer;
+  /// The name of the operand.
+  const std::string SymbolicName;
+
+public:
+  CustomRenderer(unsigned InsnID, const Record &Renderer,
+                 StringRef SymbolicName)
+      : OperandRenderer(OR_Custom), InsnID(InsnID), Renderer(Renderer),
+        SymbolicName(SymbolicName) {}
+
+  static bool classof(const OperandRenderer *R) {
+    return R->getKind() == OR_Custom;
+  }
+
+  void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
+    InstructionMatcher &InsnMatcher = Rule.getInstructionMatcher(SymbolicName);
+    unsigned OldInsnVarID = Rule.getInsnVarID(InsnMatcher);
+    Table << MatchTable::Opcode("GIR_CustomRenderer")
+          << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
+          << MatchTable::Comment("OldInsnID")
+          << MatchTable::IntValue(OldInsnVarID)
+          << MatchTable::Comment("Renderer")
+          << MatchTable::NamedValue(
+                 "GICR_" + Renderer.getValueAsString("RendererFn").str())
+          << MatchTable::Comment(SymbolicName) << MatchTable::LineBreak;
+  }
+};
+
 /// An action taken when all Matcher predicates succeeded for a parent rule.
 ///
 /// Typical actions include:
@@ -2051,7 +2497,7 @@ class BuildMIAction : public MatchAction {
 private:
   unsigned InsnID;
   const CodeGenInstruction *I;
-  const InstructionMatcher *Matched;
+  InstructionMatcher *Matched;
   std::vector<std::unique_ptr<OperandRenderer>> OperandRenderers;
 
   /// True if the instruction can be built solely by mutating the opcode.
@@ -2066,7 +2512,7 @@ private:
       if (const auto *Copy = dyn_cast<CopyRenderer>(&*Renderer.value())) {
         const OperandMatcher &OM = Rule.getOperandMatcher(Copy->getSymbolicName());
         if (Insn != &OM.getInstructionMatcher() ||
-            OM.getOperandIndex() != Renderer.index())
+            OM.getOpIdx() != Renderer.index())
           return false;
       } else
         return false;
@@ -2079,10 +2525,11 @@ public:
   BuildMIAction(unsigned InsnID, const CodeGenInstruction *I)
       : InsnID(InsnID), I(I), Matched(nullptr) {}
 
+  unsigned getInsnID() const { return InsnID; }
   const CodeGenInstruction *getCGI() const { return I; }
 
   void chooseInsnToMutate(RuleMatcher &Rule) {
-    for (const auto *MutateCandidate : Rule.mutatable_insns()) {
+    for (auto *MutateCandidate : Rule.mutatable_insns()) {
       if (canMutate(Rule, MutateCandidate)) {
         // Take the first one we're offered that we're able to mutate.
         Rule.reserveInsnMatcherForMutation(MutateCandidate);
@@ -2160,7 +2607,7 @@ public:
       std::vector<unsigned> MergeInsnIDs;
       for (const auto &IDMatcherPair : Rule.defined_insn_vars())
         MergeInsnIDs.push_back(IDMatcherPair.second);
-      std::sort(MergeInsnIDs.begin(), MergeInsnIDs.end());
+      llvm::sort(MergeInsnIDs.begin(), MergeInsnIDs.end());
       for (const auto &MergeInsnID : MergeInsnIDs)
         Table << MatchTable::IntValue(MergeInsnID);
       Table << MatchTable::NamedValue("GIU_MergeMemOperands_EndOfList")
@@ -2274,27 +2721,13 @@ action_iterator RuleMatcher::insertAction(action_iterator InsertPt,
                          llvm::make_unique<Kind>(std::forward<Args>(args)...));
 }
 
-unsigned
-RuleMatcher::implicitlyDefineInsnVar(const InstructionMatcher &Matcher) {
+unsigned RuleMatcher::implicitlyDefineInsnVar(InstructionMatcher &Matcher) {
   unsigned NewInsnVarID = NextInsnVarID++;
   InsnVariableIDs[&Matcher] = NewInsnVarID;
   return NewInsnVarID;
 }
 
-unsigned RuleMatcher::defineInsnVar(MatchTable &Table,
-                                    const InstructionMatcher &Matcher,
-                                    unsigned InsnID, unsigned OpIdx) {
-  unsigned NewInsnVarID = implicitlyDefineInsnVar(Matcher);
-  Table << MatchTable::Opcode("GIM_RecordInsn")
-        << MatchTable::Comment("DefineMI") << MatchTable::IntValue(NewInsnVarID)
-        << MatchTable::Comment("MI") << MatchTable::IntValue(InsnID)
-        << MatchTable::Comment("OpIdx") << MatchTable::IntValue(OpIdx)
-        << MatchTable::Comment("MIs[" + llvm::to_string(NewInsnVarID) + "]")
-        << MatchTable::LineBreak;
-  return NewInsnVarID;
-}
-
-unsigned RuleMatcher::getInsnVarID(const InstructionMatcher &InsnMatcher) const {
+unsigned RuleMatcher::getInsnVarID(InstructionMatcher &InsnMatcher) const {
   const auto &I = InsnVariableIDs.find(&InsnMatcher);
   if (I != InsnVariableIDs.end())
     return I->second;
@@ -2312,7 +2745,7 @@ void RuleMatcher::defineOperand(StringRef SymbolicName, OperandMatcher &OM) {
   OM.addPredicate<SameOperandMatcher>(OM.getSymbolicName());
 }
 
-const InstructionMatcher &
+InstructionMatcher &
 RuleMatcher::getInstructionMatcher(StringRef SymbolicName) const {
   for (const auto &I : InsnVariableIDs)
     if (I.first->getSymbolicName() == SymbolicName)
@@ -2331,25 +2764,10 @@ RuleMatcher::getOperandMatcher(StringRef Name) const {
   return *I->second;
 }
 
-/// Emit MatchTable opcodes to check the shape of the match and capture
-/// instructions into local variables.
-void RuleMatcher::emitCaptureOpcodes(MatchTable &Table) {
-  assert(Matchers.size() == 1 && "Cannot handle multi-root matchers yet");
-  unsigned InsnVarID = implicitlyDefineInsnVar(*Matchers.front());
-  (void)InsnVarID;
-  assert(Matchers.front()->getVarID() == InsnVarID &&
-         "IDs differ between build and emit");
-  Matchers.front()->emitCaptureOpcodes(Table, *this);
-}
-
 void RuleMatcher::emit(MatchTable &Table) {
   if (Matchers.empty())
     llvm_unreachable("Unexpected empty matcher!");
 
-  // Reset the ID generation so that the emitted IDs match the ones
-  // we set while building the InstructionMatcher and such.
-  clearImplicitMap();
-
   // The representation supports rules that require multiple roots such as:
   //    %ptr(p0) = ...
   //    %elt0(s32) = G_LOAD %ptr
@@ -2363,7 +2781,9 @@ void RuleMatcher::emit(MatchTable &Table) {
 
   unsigned LabelID = Table.allocateLabelID();
   Table << MatchTable::Opcode("GIM_Try", +1)
-        << MatchTable::Comment("On fail goto") << MatchTable::JumpTarget(LabelID)
+        << MatchTable::Comment("On fail goto")
+        << MatchTable::JumpTarget(LabelID)
+        << MatchTable::Comment(("Rule ID " + Twine(RuleID) + " //").str())
         << MatchTable::LineBreak;
 
   if (!RequiredFeatures.empty()) {
@@ -2372,8 +2792,6 @@ void RuleMatcher::emit(MatchTable &Table) {
           << MatchTable::LineBreak;
   }
 
-  emitCaptureOpcodes(Table);
-
   Matchers.front()->emitPredicateOpcodes(Table, *this);
 
   // We must also check if it's safe to fold the matched instructions.
@@ -2388,7 +2806,7 @@ void RuleMatcher::emit(MatchTable &Table) {
 
       InsnIDs.push_back(Pair.second);
     }
-    std::sort(InsnIDs.begin(), InsnIDs.end());
+    llvm::sort(InsnIDs.begin(), InsnIDs.end());
 
     for (const auto &InsnID : InsnIDs) {
       // Reject the difficult cases until we have a more accurate check.
@@ -2433,15 +2851,22 @@ void RuleMatcher::emit(MatchTable &Table) {
     }
   }
 
+  for (const auto &PM : EpilogueMatchers)
+    PM->emitPredicateOpcodes(Table, *this);
+
   for (const auto &MA : Actions)
     MA->emitActionOpcodes(Table, *this);
 
-  if (GenerateCoverage)
+  if (Table.isWithCoverage())
     Table << MatchTable::Opcode("GIR_Coverage") << MatchTable::IntValue(RuleID)
           << MatchTable::LineBreak;
+  else
+    Table << MatchTable::Comment(("GIR_Coverage, " + Twine(RuleID) + ",").str())
+          << MatchTable::LineBreak;
 
   Table << MatchTable::Opcode("GIR_Done", -1) << MatchTable::LineBreak
         << MatchTable::Label(LabelID);
+  ++NumPatternEmitted;
 }
 
 bool RuleMatcher::isHigherPriorityThan(const RuleMatcher &B) const {
@@ -2505,7 +2930,7 @@ void SameOperandMatcher::emitPredicateOpcodes(MatchTable &Table,
                                               RuleMatcher &Rule) const {
   const OperandMatcher &OtherOM = Rule.getOperandMatcher(MatchingName);
   unsigned OtherInsnVarID = Rule.getInsnVarID(OtherOM.getInstructionMatcher());
-  assert(OtherInsnVarID == OtherOM.getInstructionMatcher().getVarID());
+  assert(OtherInsnVarID == OtherOM.getInstructionMatcher().getInsnVarID());
 
   Table << MatchTable::Opcode("GIM_CheckIsSameOperand")
         << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
@@ -2513,7 +2938,7 @@ void SameOperandMatcher::emitPredicateOpcodes(MatchTable &Table,
         << MatchTable::Comment("OtherMI")
         << MatchTable::IntValue(OtherInsnVarID)
         << MatchTable::Comment("OtherOpIdx")
-        << MatchTable::IntValue(OtherOM.getOperandIndex())
+        << MatchTable::IntValue(OtherOM.getOpIdx())
         << MatchTable::LineBreak;
 }
 
@@ -2541,25 +2966,43 @@ private:
   /// GIComplexPatternEquiv.
   DenseMap<const Record *, const Record *> ComplexPatternEquivs;
 
+  /// Keep track of the equivalence between SDNodeXForm's and
+  /// GICustomOperandRenderer. Map entries are specified by subclassing
+  /// GISDNodeXFormEquiv.
+  DenseMap<const Record *, const Record *> SDNodeXFormEquivs;
+
+  /// Keep track of Scores of PatternsToMatch similar to how the DAG does.
+  /// This adds compatibility for RuleMatchers to use this for ordering rules.
+  DenseMap<uint64_t, int> RuleMatcherScores;
+
   // Map of predicates to their subtarget features.
   SubtargetFeatureInfoMap SubtargetFeatures;
 
   // Rule coverage information.
   Optional<CodeGenCoverage> RuleCoverage;
 
+  void gatherOpcodeValues();
+  void gatherTypeIDValues();
   void gatherNodeEquivs();
+  // Instruction predicate code that will be emitted in generated functions.
+  SmallVector<std::string, 2> InstructionPredicateCodes;
+  unsigned getOrCreateInstructionPredicateFnId(StringRef Code);
+
   Record *findNodeEquiv(Record *N) const;
+  const CodeGenInstruction *getEquivNode(Record &Equiv,
+                                         const TreePatternNode *N) const;
 
   Error importRulePredicates(RuleMatcher &M, ArrayRef<Predicate> Predicates);
-  Expected<InstructionMatcher &> createAndImportSelDAGMatcher(
-      RuleMatcher &Rule, InstructionMatcher &InsnMatcher,
-      const TreePatternNode *Src, unsigned &TempOpIdx) const;
+  Expected<InstructionMatcher &>
+  createAndImportSelDAGMatcher(RuleMatcher &Rule,
+                               InstructionMatcher &InsnMatcher,
+                               const TreePatternNode *Src, unsigned &TempOpIdx);
   Error importComplexPatternOperandMatcher(OperandMatcher &OM, Record *R,
                                            unsigned &TempOpIdx) const;
   Error importChildMatcher(RuleMatcher &Rule, InstructionMatcher &InsnMatcher,
                            const TreePatternNode *SrcChild,
                            bool OperandIsAPointer, unsigned OpIdx,
-                           unsigned &TempOpIdx) const;
+                           unsigned &TempOpIdx);
 
   Expected<BuildMIAction &>
   createAndImportInstructionRenderer(RuleMatcher &M,
@@ -2585,9 +3028,14 @@ private:
   importImplicitDefRenderers(BuildMIAction &DstMIBuilder,
                              const std::vector<Record *> &ImplicitDefs) const;
 
-  void emitImmPredicates(raw_ostream &OS, StringRef TypeIdentifier,
-                         StringRef Type,
-                         std::function<bool(const Record *R)> Filter);
+  void emitCxxPredicateFns(raw_ostream &OS, StringRef CodeFieldName,
+                           StringRef TypeIdentifier, StringRef ArgType,
+                           StringRef ArgName, StringRef AdditionalDeclarations,
+                           std::function<bool(const Record *R)> Filter);
+  void emitImmPredicateFns(raw_ostream &OS, StringRef TypeIdentifier,
+                           StringRef ArgType,
+                           std::function<bool(const Record *R)> Filter);
+  void emitMIPredicateFns(raw_ostream &OS);
 
   /// Analyze pattern \p P, returning a matcher for it if possible.
   /// Otherwise, return an Error explaining why we don't support it.
@@ -2595,19 +3043,15 @@ private:
 
   void declareSubtargetFeature(Record *Predicate);
 
-  TreePatternNode *fixupPatternNode(TreePatternNode *N);
-  void fixupPatternTrees(TreePattern *P);
+  MatchTable buildMatchTable(MutableArrayRef<RuleMatcher> Rules, bool Optimize,
+                             bool WithCoverage);
 
+public:
   /// Takes a sequence of \p Rules and group them based on the predicates
-  /// they share. \p StorageGroupMatcher is used as a memory container
-  /// for the the group that are created as part of this process.
-  /// The optimization process does not change the relative order of
-  /// the rules. In particular, we don't try to share predicates if
-  /// that means reordering the rules (e.g., we won't group R1 and R3
-  /// in the following example as it would imply reordering R2 and R3
-  /// => R1 p1, R2 p2, R3 p1).
+  /// they share. \p MatcherStorage is used as a memory container
+  /// for the group that are created as part of this process.
   ///
-  /// What this optimization does looks like:
+  /// What this optimization does looks like if GroupT = GroupMatcher:
   /// Output without optimization:
   /// \verbatim
   /// # R1
@@ -2628,11 +3072,34 @@ private:
   ///  # R2
   ///   # predicate C
   /// \endverbatim
-  std::vector<Matcher *> optimizeRules(
-      const std::vector<Matcher *> &Rules,
-      std::vector<std::unique_ptr<GroupMatcher>> &StorageGroupMatcher);
+  template <class GroupT>
+  static std::vector<Matcher *> optimizeRules(
+      ArrayRef<Matcher *> Rules,
+      std::vector<std::unique_ptr<Matcher>> &MatcherStorage);
 };
 
+void GlobalISelEmitter::gatherOpcodeValues() {
+  InstructionOpcodeMatcher::initOpcodeValuesMap(Target);
+}
+
+void GlobalISelEmitter::gatherTypeIDValues() {
+  LLTOperandMatcher::initTypeIDValuesMap();
+}
+unsigned GlobalISelEmitter::getOrCreateInstructionPredicateFnId(StringRef Code) {
+  // There's not very many predicates that need to be here at the moment so we
+  // just maintain a simple set-like vector. If it grows then we'll need to do
+  // something more efficient.
+  const auto &I = std::find(InstructionPredicateCodes.begin(),
+                            InstructionPredicateCodes.end(),
+                            Code);
+  if (I == InstructionPredicateCodes.end()) {
+    unsigned ID = InstructionPredicateCodes.size();
+    InstructionPredicateCodes.push_back(Code);
+    return ID;
+  }
+  return std::distance(InstructionPredicateCodes.begin(), I);
+}
+
 void GlobalISelEmitter::gatherNodeEquivs() {
   assert(NodeEquivs.empty());
   for (Record *Equiv : RK.getAllDerivedDefinitions("GINodeEquiv"))
@@ -2645,15 +3112,36 @@ void GlobalISelEmitter::gatherNodeEquivs() {
       continue;
     ComplexPatternEquivs[SelDAGEquiv] = Equiv;
  }
+
+ assert(SDNodeXFormEquivs.empty());
+ for (Record *Equiv : RK.getAllDerivedDefinitions("GISDNodeXFormEquiv")) {
+   Record *SelDAGEquiv = Equiv->getValueAsDef("SelDAGEquivalent");
+   if (!SelDAGEquiv)
+     continue;
+   SDNodeXFormEquivs[SelDAGEquiv] = Equiv;
+ }
 }
 
 Record *GlobalISelEmitter::findNodeEquiv(Record *N) const {
   return NodeEquivs.lookup(N);
 }
 
+const CodeGenInstruction *
+GlobalISelEmitter::getEquivNode(Record &Equiv, const TreePatternNode *N) const {
+  for (const auto &Predicate : N->getPredicateFns()) {
+    if (!Equiv.isValueUnset("IfSignExtend") && Predicate.isLoad() &&
+        Predicate.isSignExtLoad())
+      return &Target.getInstruction(Equiv.getValueAsDef("IfSignExtend"));
+    if (!Equiv.isValueUnset("IfZeroExtend") && Predicate.isLoad() &&
+        Predicate.isZeroExtLoad())
+      return &Target.getInstruction(Equiv.getValueAsDef("IfZeroExtend"));
+  }
+  return &Target.getInstruction(Equiv.getValueAsDef("I"));
+}
+
 GlobalISelEmitter::GlobalISelEmitter(RecordKeeper &RK)
-    : RK(RK), CGP(RK, [&](TreePattern *P) { fixupPatternTrees(P); }),
-      Target(CGP.getTargetInfo()), CGRegs(RK, Target.getHwModes()) {}
+    : RK(RK), CGP(RK), Target(CGP.getTargetInfo()),
+      CGRegs(RK, Target.getHwModes()) {}
 
 //===- Emitter ------------------------------------------------------------===//
 
@@ -2672,7 +3160,7 @@ GlobalISelEmitter::importRulePredicates(RuleMatcher &M,
 
 Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
     RuleMatcher &Rule, InstructionMatcher &InsnMatcher,
-    const TreePatternNode *Src, unsigned &TempOpIdx) const {
+    const TreePatternNode *Src, unsigned &TempOpIdx) {
   Record *SrcGIEquivOrNull = nullptr;
   const CodeGenInstruction *SrcGIOrNull = nullptr;
 
@@ -2693,7 +3181,7 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
     if (!SrcGIEquivOrNull)
       return failedImport("Pattern operator lacks an equivalent Instruction" +
                           explainOperator(Src->getOperator()));
-    SrcGIOrNull = &Target.getInstruction(SrcGIEquivOrNull->getValueAsDef("I"));
+    SrcGIOrNull = getEquivNode(*SrcGIEquivOrNull, Src);
 
     // The operators look good: match the opcode
     InsnMatcher.addPredicate<InstructionOpcodeMatcher>(SrcGIOrNull);
@@ -2718,8 +3206,26 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
       continue;
     }
 
-    // No check required. G_LOAD by itself is a non-extending load.
-    if (Predicate.isNonExtLoad())
+    // G_LOAD is used for both non-extending and any-extending loads. 
+    if (Predicate.isLoad() && Predicate.isNonExtLoad()) {
+      InsnMatcher.addPredicate<MemoryVsLLTSizePredicateMatcher>(
+          0, MemoryVsLLTSizePredicateMatcher::EqualTo, 0);
+      continue;
+    }
+    if (Predicate.isLoad() && Predicate.isAnyExtLoad()) {
+      InsnMatcher.addPredicate<MemoryVsLLTSizePredicateMatcher>(
+          0, MemoryVsLLTSizePredicateMatcher::LessThan, 0);
+      continue;
+    }
+
+    // No check required. We already did it by swapping the opcode.
+    if (!SrcGIEquivOrNull->isValueUnset("IfSignExtend") &&
+        Predicate.isSignExtLoad())
+      continue;
+
+    // No check required. We already did it by swapping the opcode.
+    if (!SrcGIEquivOrNull->isValueUnset("IfZeroExtend") &&
+        Predicate.isZeroExtLoad())
       continue;
 
     // No check required. G_STORE by itself is a non-extending store.
@@ -2734,8 +3240,13 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
         if (!MemTyOrNone)
           return failedImport("MemVT could not be converted to LLT");
 
-        OperandMatcher &OM = InsnMatcher.getOperand(0);
-        OM.addPredicate<LLTOperandMatcher>(MemTyOrNone.getValue());
+        // MMO's work in bytes so we must take care of unusual types like i1
+        // don't round down.
+        unsigned MemSizeInBits =
+            llvm::alignTo(MemTyOrNone->get().getSizeInBits(), 8);
+
+        InsnMatcher.addPredicate<MemorySizePredicateMatcher>(
+            0, MemSizeInBits / 8);
         continue;
       }
     }
@@ -2794,6 +3305,11 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
       }
     }
 
+    if (Predicate.hasGISelPredicateCode()) {
+      InsnMatcher.addPredicate<GenericInstructionPredicateMatcher>(Predicate);
+      continue;
+    }
+
     return failedImport("Src pattern child has predicate (" +
                         explainPredicates(Src) + ")");
   }
@@ -2872,7 +3388,7 @@ Error GlobalISelEmitter::importChildMatcher(RuleMatcher &Rule,
                                             const TreePatternNode *SrcChild,
                                             bool OperandIsAPointer,
                                             unsigned OpIdx,
-                                            unsigned &TempOpIdx) const {
+                                            unsigned &TempOpIdx) {
   OperandMatcher &OM =
       InsnMatcher.addOperand(OpIdx, SrcChild->getName(), TempOpIdx);
   if (OM.isSameAsAnotherOperand())
@@ -2986,10 +3502,6 @@ Error GlobalISelEmitter::importChildMatcher(RuleMatcher &Rule,
 Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
     action_iterator InsertPt, RuleMatcher &Rule, BuildMIAction &DstMIBuilder,
     TreePatternNode *DstChild) {
-  if (DstChild->getTransformFn() != nullptr) {
-    return failedImport("Dst pattern child has transform fn " +
-                        DstChild->getTransformFn()->getName());
-  }
 
   const auto &SubOperand = Rule.getComplexSubOperand(DstChild->getName());
   if (SubOperand.hasValue()) {
@@ -3000,6 +3512,18 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
   }
 
   if (!DstChild->isLeaf()) {
+
+    if (DstChild->getOperator()->isSubClassOf("SDNodeXForm")) {
+      auto Child = DstChild->getChild(0);
+      auto I = SDNodeXFormEquivs.find(DstChild->getOperator());
+      if (I != SDNodeXFormEquivs.end()) {
+        DstMIBuilder.addRenderer<CustomRenderer>(*I->second, Child->getName());
+        return InsertPt;
+      }
+      return failedImport("SDNodeXForm " + Child->getName() +
+                          " has no custom renderer");
+    }
+
     // We accept 'bb' here. It's an operator because BasicBlockSDNode isn't
     // inline, but in MI it's just another operand.
     if (DstChild->getOperator()->isSubClassOf("SDNode")) {
@@ -3104,10 +3628,6 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
       return InsertPt;
     }
 
-    if (ChildRec->isSubClassOf("SDNodeXForm"))
-      return failedImport("Dst pattern child def is an unsupported tablegen "
-                          "class (SDNodeXForm)");
-
     return failedImport(
         "Dst pattern child def is an unsupported tablegen class");
   }
@@ -3135,7 +3655,7 @@ Expected<BuildMIAction &> GlobalISelEmitter::createAndImportInstructionRenderer(
 
 Expected<action_iterator>
 GlobalISelEmitter::createAndImportSubInstructionRenderer(
-    action_iterator InsertPt, RuleMatcher &M, const TreePatternNode *Dst,
+    const action_iterator InsertPt, RuleMatcher &M, const TreePatternNode *Dst,
     unsigned TempRegID) {
   auto InsertPtOrError = createInstructionRenderer(InsertPt, M, Dst);
 
@@ -3143,7 +3663,6 @@ GlobalISelEmitter::createAndImportSubInstructionRenderer(
 
   if (auto Error = InsertPtOrError.takeError())
     return std::move(Error);
-  InsertPt = InsertPtOrError.get();
 
   BuildMIAction &DstMIBuilder =
       *static_cast<BuildMIAction *>(InsertPtOrError.get()->get());
@@ -3151,10 +3670,13 @@ GlobalISelEmitter::createAndImportSubInstructionRenderer(
   // Assign the result to TempReg.
   DstMIBuilder.addRenderer<TempRegRenderer>(TempRegID, true);
 
-  InsertPtOrError = importExplicitUseRenderers(InsertPt, M, DstMIBuilder, Dst);
+  InsertPtOrError =
+      importExplicitUseRenderers(InsertPtOrError.get(), M, DstMIBuilder, Dst);
   if (auto Error = InsertPtOrError.takeError())
     return std::move(Error);
 
+  M.insertAction<ConstrainOperandsToDefinitionAction>(InsertPt,
+                                                      DstMIBuilder.getInsnID());
   return InsertPtOrError.get();
 }
 
@@ -3311,7 +3833,9 @@ Error GlobalISelEmitter::importImplicitDefRenderers(
 
 Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
   // Keep track of the matchers and actions to emit.
+  int Score = P.getPatternComplexity(CGP);
   RuleMatcher M(P.getSrcRecord()->getLoc());
+  RuleMatcherScores[M.getRuleID()] = Score;
   M.addAction<DebugCommentAction>(llvm::to_string(*P.getSrcPattern()) +
                                   "  =>  " +
                                   llvm::to_string(*P.getDstPattern()));
@@ -3526,14 +4050,15 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
 // Emit imm predicate table and an enum to reference them with.
 // The 'Predicate_' part of the name is redundant but eliminating it is more
 // trouble than it's worth.
-void GlobalISelEmitter::emitImmPredicates(
-    raw_ostream &OS, StringRef TypeIdentifier, StringRef Type,
+void GlobalISelEmitter::emitCxxPredicateFns(
+    raw_ostream &OS, StringRef CodeFieldName, StringRef TypeIdentifier,
+    StringRef ArgType, StringRef ArgName, StringRef AdditionalDeclarations,
     std::function<bool(const Record *R)> Filter) {
   std::vector<const Record *> MatchedRecords;
   const auto &Defs = RK.getAllDerivedDefinitions("PatFrag");
   std::copy_if(Defs.begin(), Defs.end(), std::back_inserter(MatchedRecords),
                [&](Record *Record) {
-                 return !Record->getValueAsString("ImmediateCode").empty() &&
+                 return !Record->getValueAsString(CodeFieldName).empty() &&
                         Filter(Record);
                });
 
@@ -3550,16 +4075,20 @@ void GlobalISelEmitter::emitImmPredicates(
     OS << "};\n";
   }
 
-  OS << "bool " << Target.getName() << "InstructionSelector::testImmPredicate_"
-     << TypeIdentifier << "(unsigned PredicateID, " << Type
-     << " Imm) const {\n";
+  OS << "bool " << Target.getName() << "InstructionSelector::test" << ArgName
+     << "Predicate_" << TypeIdentifier << "(unsigned PredicateID, " << ArgType << " "
+     << ArgName << ") const {\n"
+     << AdditionalDeclarations;
+  if (!AdditionalDeclarations.empty())
+    OS << "\n";
   if (!MatchedRecords.empty())
     OS << "  switch (PredicateID) {\n";
   for (const auto *Record : MatchedRecords) {
     OS << "  case GIPFP_" << TypeIdentifier << "_Predicate_"
        << Record->getName() << ": {\n"
-       << "    " << Record->getValueAsString("ImmediateCode") << "\n"
-       << "    llvm_unreachable(\"ImmediateCode should have returned\");\n"
+       << "    " << Record->getValueAsString(CodeFieldName) << "\n"
+       << "    llvm_unreachable(\"" << CodeFieldName
+       << " should have returned\");\n"
        << "    return false;\n"
        << "  }\n";
   }
@@ -3570,38 +4099,144 @@ void GlobalISelEmitter::emitImmPredicates(
      << "}\n";
 }
 
+void GlobalISelEmitter::emitImmPredicateFns(
+    raw_ostream &OS, StringRef TypeIdentifier, StringRef ArgType,
+    std::function<bool(const Record *R)> Filter) {
+  return emitCxxPredicateFns(OS, "ImmediateCode", TypeIdentifier, ArgType,
+                             "Imm", "", Filter);
+}
+
+void GlobalISelEmitter::emitMIPredicateFns(raw_ostream &OS) {
+  return emitCxxPredicateFns(
+      OS, "GISelPredicateCode", "MI", "const MachineInstr &", "MI",
+      "  const MachineFunction &MF = *MI.getParent()->getParent();\n"
+      "  const MachineRegisterInfo &MRI = MF.getRegInfo();\n"
+      "  (void)MRI;",
+      [](const Record *R) { return true; });
+}
+
+template <class GroupT>
 std::vector<Matcher *> GlobalISelEmitter::optimizeRules(
-    const std::vector<Matcher *> &Rules,
-    std::vector<std::unique_ptr<GroupMatcher>> &StorageGroupMatcher) {
+    ArrayRef<Matcher *> Rules,
+    std::vector<std::unique_ptr<Matcher>> &MatcherStorage) {
+
   std::vector<Matcher *> OptRules;
-  // Start with a stupid grouping for now.
-  std::unique_ptr<GroupMatcher> CurrentGroup = make_unique<GroupMatcher>();
-  assert(CurrentGroup->conditions_empty());
-  unsigned NbGroup = 0;
-  for (Matcher *Rule : Rules) {
-    std::unique_ptr<PredicateMatcher> Predicate = Rule->forgetFirstCondition();
-    if (!CurrentGroup->conditions_empty() &&
-        !CurrentGroup->lastConditionMatches(*Predicate)) {
-      // Start a new group.
-      ++NbGroup;
+  std::unique_ptr<GroupT> CurrentGroup = make_unique<GroupT>();
+  assert(CurrentGroup->empty() && "Newly created group isn't empty!");
+  unsigned NumGroups = 0;
+
+  auto ProcessCurrentGroup = [&]() {
+    if (CurrentGroup->empty())
+      // An empty group is good to be reused:
+      return;
+
+    // If the group isn't large enough to provide any benefit, move all the
+    // added rules out of it and make sure to re-create the group to properly
+    // re-initialize it:
+    if (CurrentGroup->size() < 2)
+      for (Matcher *M : CurrentGroup->matchers())
+        OptRules.push_back(M);
+    else {
+      CurrentGroup->finalize();
       OptRules.push_back(CurrentGroup.get());
-      StorageGroupMatcher.emplace_back(std::move(CurrentGroup));
-      CurrentGroup = make_unique<GroupMatcher>();
-      assert(CurrentGroup->conditions_empty());
+      MatcherStorage.emplace_back(std::move(CurrentGroup));
+      ++NumGroups;
     }
-    if (CurrentGroup->conditions_empty())
-      CurrentGroup->addCondition(std::move(Predicate));
-    CurrentGroup->addRule(*Rule);
-  }
-  if (!CurrentGroup->conditions_empty()) {
-    ++NbGroup;
-    OptRules.push_back(CurrentGroup.get());
-    StorageGroupMatcher.emplace_back(std::move(CurrentGroup));
+    CurrentGroup = make_unique<GroupT>();
+  };
+  for (Matcher *Rule : Rules) {
+    // Greedily add as many matchers as possible to the current group:
+    if (CurrentGroup->addMatcher(*Rule))
+      continue;
+
+    ProcessCurrentGroup();
+    assert(CurrentGroup->empty() && "A group wasn't properly re-initialized");
+
+    // Try to add the pending matcher to a newly created empty group:
+    if (!CurrentGroup->addMatcher(*Rule))
+      // If we couldn't add the matcher to an empty group, that group type
+      // doesn't support that kind of matchers at all, so just skip it:
+      OptRules.push_back(Rule);
   }
-  DEBUG(dbgs() << "NbGroup: " << NbGroup << "\n");
+  ProcessCurrentGroup();
+
+  LLVM_DEBUG(dbgs() << "NumGroups: " << NumGroups << "\n");
+  assert(CurrentGroup->empty() && "The last group wasn't properly processed");
   return OptRules;
 }
 
+MatchTable
+GlobalISelEmitter::buildMatchTable(MutableArrayRef<RuleMatcher> Rules,
+                                   bool Optimize, bool WithCoverage) {
+  std::vector<Matcher *> InputRules;
+  for (Matcher &Rule : Rules)
+    InputRules.push_back(&Rule);
+
+  if (!Optimize)
+    return MatchTable::buildTable(InputRules, WithCoverage);
+
+  unsigned CurrentOrdering = 0;
+  StringMap<unsigned> OpcodeOrder;
+  for (RuleMatcher &Rule : Rules) {
+    const StringRef Opcode = Rule.getOpcode();
+    assert(!Opcode.empty() && "Didn't expect an undefined opcode");
+    if (OpcodeOrder.count(Opcode) == 0)
+      OpcodeOrder[Opcode] = CurrentOrdering++;
+  }
+
+  std::stable_sort(InputRules.begin(), InputRules.end(),
+                   [&OpcodeOrder](const Matcher *A, const Matcher *B) {
+                     auto *L = static_cast<const RuleMatcher *>(A);
+                     auto *R = static_cast<const RuleMatcher *>(B);
+                     return std::make_tuple(OpcodeOrder[L->getOpcode()],
+                                            L->getNumOperands()) <
+                            std::make_tuple(OpcodeOrder[R->getOpcode()],
+                                            R->getNumOperands());
+                   });
+
+  for (Matcher *Rule : InputRules)
+    Rule->optimize();
+
+  std::vector<std::unique_ptr<Matcher>> MatcherStorage;
+  std::vector<Matcher *> OptRules =
+      optimizeRules<GroupMatcher>(InputRules, MatcherStorage);
+
+  for (Matcher *Rule : OptRules)
+    Rule->optimize();
+
+  OptRules = optimizeRules<SwitchMatcher>(OptRules, MatcherStorage);
+
+  return MatchTable::buildTable(OptRules, WithCoverage);
+}
+
+void GroupMatcher::optimize() {
+  // Make sure we only sort by a specific predicate within a range of rules that
+  // all have that predicate checked against a specific value (not a wildcard):
+  auto F = Matchers.begin();
+  auto T = F;
+  auto E = Matchers.end();
+  while (T != E) {
+    while (T != E) {
+      auto *R = static_cast<RuleMatcher *>(*T);
+      if (!R->getFirstConditionAsRootType().get().isValid())
+        break;
+      ++T;
+    }
+    std::stable_sort(F, T, [](Matcher *A, Matcher *B) {
+      auto *L = static_cast<RuleMatcher *>(A);
+      auto *R = static_cast<RuleMatcher *>(B);
+      return L->getFirstConditionAsRootType() <
+             R->getFirstConditionAsRootType();
+    });
+    if (T != E)
+      F = ++T;
+  }
+  GlobalISelEmitter::optimizeRules<GroupMatcher>(Matchers, MatcherStorage)
+      .swap(Matchers);
+  GlobalISelEmitter::optimizeRules<SwitchMatcher>(Matchers, MatcherStorage)
+      .swap(Matchers);
+}
+
 void GlobalISelEmitter::run(raw_ostream &OS) {
   if (!UseCoverageFile.empty()) {
     RuleCoverage = CodeGenCoverage();
@@ -3617,6 +4252,11 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
     }
   }
 
+  // Track the run-time opcode values
+  gatherOpcodeValues();
+  // Track the run-time LLT ID values
+  gatherTypeIDValues();
+
   // Track the GINodeEquiv definitions.
   gatherNodeEquivs();
 
@@ -3652,14 +4292,19 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
     Rules.push_back(std::move(MatcherOrErr.get()));
   }
 
+  // Comparison function to order records by name.
+  auto orderByName = [](const Record *A, const Record *B) {
+    return A->getName() < B->getName();
+  };
+
   std::vector<Record *> ComplexPredicates =
       RK.getAllDerivedDefinitions("GIComplexOperandMatcher");
-  std::sort(ComplexPredicates.begin(), ComplexPredicates.end(),
-            [](const Record *A, const Record *B) {
-              if (A->getName() < B->getName())
-                return true;
-              return false;
-            });
+  llvm::sort(ComplexPredicates.begin(), ComplexPredicates.end(), orderByName);
+
+  std::vector<Record *> CustomRendererFns =
+      RK.getAllDerivedDefinitions("GICustomOperandRenderer");
+  llvm::sort(CustomRendererFns.begin(), CustomRendererFns.end(), orderByName);
+
   unsigned MaxTemporaries = 0;
   for (const auto &Rule : Rules)
     MaxTemporaries = std::max(MaxTemporaries, Rule.countRendererFns());
@@ -3677,21 +4322,33 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
         "ComplexRendererFns("
      << Target.getName()
      << "InstructionSelector::*ComplexMatcherMemFn)(MachineOperand &) const;\n"
-     << "  const MatcherInfoTy<PredicateBitset, ComplexMatcherMemFn> "
-        "MatcherInfo;\n"
-     << "  static " << Target.getName()
+
+     << "  typedef void(" << Target.getName()
+     << "InstructionSelector::*CustomRendererFn)(MachineInstrBuilder &, const "
+        "MachineInstr&) "
+        "const;\n"
+     << "  const ISelInfoTy<PredicateBitset, ComplexMatcherMemFn, "
+        "CustomRendererFn> "
+        "ISelInfo;\n";
+  OS << "  static " << Target.getName()
      << "InstructionSelector::ComplexMatcherMemFn ComplexPredicateFns[];\n"
-     << "bool testImmPredicate_I64(unsigned PredicateID, int64_t Imm) const "
+     << "  static " << Target.getName()
+     << "InstructionSelector::CustomRendererFn CustomRenderers[];\n"
+     << "  bool testImmPredicate_I64(unsigned PredicateID, int64_t Imm) const "
         "override;\n"
-     << "bool testImmPredicate_APInt(unsigned PredicateID, const APInt &Imm) "
+     << "  bool testImmPredicate_APInt(unsigned PredicateID, const APInt &Imm) "
         "const override;\n"
-     << "bool testImmPredicate_APFloat(unsigned PredicateID, const APFloat "
+     << "  bool testImmPredicate_APFloat(unsigned PredicateID, const APFloat "
         "&Imm) const override;\n"
+     << "  const int64_t *getMatchTable() const override;\n"
+     << "  bool testMIPredicate_MI(unsigned PredicateID, const MachineInstr &MI) "
+        "const override;\n"
      << "#endif // ifdef GET_GLOBALISEL_TEMPORARIES_DECL\n\n";
 
   OS << "#ifdef GET_GLOBALISEL_TEMPORARIES_INIT\n"
      << ", State(" << MaxTemporaries << "),\n"
-     << "MatcherInfo({TypeObjects, FeatureBitsets, ComplexPredicateFns})\n"
+     << "ISelInfo(TypeObjects, NumTypeObjects, FeatureBitsets"
+     << ", ComplexPredicateFns, CustomRenderers)\n"
      << "#endif // ifdef GET_GLOBALISEL_TEMPORARIES_INIT\n\n";
 
   OS << "#ifdef GET_GLOBALISEL_IMPL\n";
@@ -3723,9 +4380,9 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
   // Emit a table containing the LLT objects needed by the matcher and an enum
   // for the matcher to reference them with.
   std::vector<LLTCodeGen> TypeObjects;
-  for (const auto &Ty : LLTOperandMatcher::KnownTypes)
+  for (const auto &Ty : KnownTypes)
     TypeObjects.push_back(Ty);
-  std::sort(TypeObjects.begin(), TypeObjects.end());
+  llvm::sort(TypeObjects.begin(), TypeObjects.end());
   OS << "// LLT Objects.\n"
      << "enum {\n";
   for (const auto &TypeObject : TypeObjects) {
@@ -3733,7 +4390,8 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
     TypeObject.emitCxxEnumValue(OS);
     OS << ",\n";
   }
-  OS << "};\n"
+  OS << "};\n";
+  OS << "const static size_t NumTypeObjects = " << TypeObjects.size() << ";\n"
      << "const static LLT TypeObjects[] = {\n";
   for (const auto &TypeObject : TypeObjects) {
     OS << "  ";
@@ -3747,7 +4405,7 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
   std::vector<std::vector<Record *>> FeatureBitsets;
   for (auto &Rule : Rules)
     FeatureBitsets.push_back(Rule.getRequiredFeatures());
-  std::sort(
+  llvm::sort(
       FeatureBitsets.begin(), FeatureBitsets.end(),
       [&](const std::vector<Record *> &A, const std::vector<Record *> &B) {
         if (A.size() < B.size())
@@ -3798,18 +4456,19 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
   OS << "};\n"
      << "// See constructor for table contents\n\n";
 
-  emitImmPredicates(OS, "I64", "int64_t", [](const Record *R) {
+  emitImmPredicateFns(OS, "I64", "int64_t", [](const Record *R) {
     bool Unset;
     return !R->getValueAsBitOrUnset("IsAPFloat", Unset) &&
            !R->getValueAsBit("IsAPInt");
   });
-  emitImmPredicates(OS, "APFloat", "const APFloat &", [](const Record *R) {
+  emitImmPredicateFns(OS, "APFloat", "const APFloat &", [](const Record *R) {
     bool Unset;
     return R->getValueAsBitOrUnset("IsAPFloat", Unset);
   });
-  emitImmPredicates(OS, "APInt", "const APInt &", [](const Record *R) {
+  emitImmPredicateFns(OS, "APInt", "const APInt &", [](const Record *R) {
     return R->getValueAsBit("IsAPInt");
   });
+  emitMIPredicateFns(OS);
   OS << "\n";
 
   OS << Target.getName() << "InstructionSelector::ComplexMatcherMemFn\n"
@@ -3821,22 +4480,30 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
        << ", // " << Record->getName() << "\n";
   OS << "};\n\n";
 
-  OS << "bool " << Target.getName()
-     << "InstructionSelector::selectImpl(MachineInstr &I, CodeGenCoverage "
-        "&CoverageInfo) const {\n"
-     << "  MachineFunction &MF = *I.getParent()->getParent();\n"
-     << "  MachineRegisterInfo &MRI = MF.getRegInfo();\n"
-     << "  // FIXME: This should be computed on a per-function basis rather "
-        "than per-insn.\n"
-     << "  AvailableFunctionFeatures = computeAvailableFunctionFeatures(&STI, "
-        "&MF);\n"
-     << "  const PredicateBitset AvailableFeatures = getAvailableFeatures();\n"
-     << "  NewMIVector OutMIs;\n"
-     << "  State.MIs.clear();\n"
-     << "  State.MIs.push_back(&I);\n\n";
+  OS << "// Custom renderers.\n"
+     << "enum {\n"
+     << "  GICR_Invalid,\n";
+  for (const auto &Record : CustomRendererFns)
+    OS << "  GICR_" << Record->getValueAsString("RendererFn") << ", \n";
+  OS << "};\n";
+
+  OS << Target.getName() << "InstructionSelector::CustomRendererFn\n"
+     << Target.getName() << "InstructionSelector::CustomRenderers[] = {\n"
+     << "  nullptr, // GICP_Invalid\n";
+  for (const auto &Record : CustomRendererFns)
+    OS << "  &" << Target.getName()
+       << "InstructionSelector::" << Record->getValueAsString("RendererFn")
+       << ", // " << Record->getName() << "\n";
+  OS << "};\n\n";
 
   std::stable_sort(Rules.begin(), Rules.end(), [&](const RuleMatcher &A,
                                                    const RuleMatcher &B) {
+    int ScoreA = RuleMatcherScores[A.getRuleID()];
+    int ScoreB = RuleMatcherScores[B.getRuleID()];
+    if (ScoreA > ScoreB)
+      return true;
+    if (ScoreB > ScoreA)
+      return false;
     if (A.isHigherPriorityThan(B)) {
       assert(!B.isHigherPriorityThan(A) && "Cannot be more important "
                                            "and less important at "
@@ -3845,32 +4512,37 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
     }
     return false;
   });
-  std::vector<std::unique_ptr<GroupMatcher>> StorageGroupMatcher;
 
-  std::vector<Matcher *> InputRules;
-  for (Matcher &Rule : Rules)
-    InputRules.push_back(&Rule);
-
-  std::vector<Matcher *> OptRules =
-      OptimizeMatchTable ? optimizeRules(InputRules, StorageGroupMatcher)
-                         : InputRules;
+  OS << "bool " << Target.getName()
+     << "InstructionSelector::selectImpl(MachineInstr &I, CodeGenCoverage "
+        "&CoverageInfo) const {\n"
+     << "  MachineFunction &MF = *I.getParent()->getParent();\n"
+     << "  MachineRegisterInfo &MRI = MF.getRegInfo();\n"
+     << "  // FIXME: This should be computed on a per-function basis rather "
+        "than per-insn.\n"
+     << "  AvailableFunctionFeatures = computeAvailableFunctionFeatures(&STI, "
+        "&MF);\n"
+     << "  const PredicateBitset AvailableFeatures = getAvailableFeatures();\n"
+     << "  NewMIVector OutMIs;\n"
+     << "  State.MIs.clear();\n"
+     << "  State.MIs.push_back(&I);\n\n"
+     << "  if (executeMatchTable(*this, OutMIs, State, ISelInfo"
+     << ", getMatchTable(), TII, MRI, TRI, RBI, AvailableFeatures"
+     << ", CoverageInfo)) {\n"
+     << "    return true;\n"
+     << "  }\n\n"
+     << "  return false;\n"
+     << "}\n\n";
 
-  MatchTable Table(0);
-  for (Matcher *Rule : OptRules) {
-    Rule->emit(Table);
-    ++NumPatternEmitted;
-  }
-  Table << MatchTable::Opcode("GIM_Reject") << MatchTable::LineBreak;
+  const MatchTable Table =
+      buildMatchTable(Rules, OptimizeMatchTable, GenerateCoverage);
+  OS << "const int64_t *" << Target.getName()
+     << "InstructionSelector::getMatchTable() const {\n";
   Table.emitDeclaration(OS);
-  OS << "  if (executeMatchTable(*this, OutMIs, State, MatcherInfo, ";
+  OS << "  return ";
   Table.emitUse(OS);
-  OS << ", TII, MRI, TRI, RBI, AvailableFeatures, CoverageInfo)) {\n"
-     << "    return true;\n"
-     << "  }\n\n";
-
-  OS << "  return false;\n"
-     << "}\n"
-     << "#endif // ifdef GET_GLOBALISEL_IMPL\n";
+  OS << ";\n}\n";
+  OS << "#endif // ifdef GET_GLOBALISEL_IMPL\n";
 
   OS << "#ifdef GET_GLOBALISEL_PREDICATES_DECL\n"
      << "PredicateBitset AvailableModuleFeatures;\n"
@@ -3899,137 +4571,290 @@ void GlobalISelEmitter::declareSubtargetFeature(Record *Predicate) {
         Predicate, SubtargetFeatureInfo(Predicate, SubtargetFeatures.size()));
 }
 
-TreePatternNode *GlobalISelEmitter::fixupPatternNode(TreePatternNode *N) {
-  if (!N->isLeaf()) {
-    for (unsigned I = 0, E = N->getNumChildren(); I < E; ++I) {
-      TreePatternNode *OrigChild = N->getChild(I);
-      TreePatternNode *NewChild = fixupPatternNode(OrigChild);
-      if (OrigChild != NewChild)
-        N->setChild(I, NewChild);
+void RuleMatcher::optimize() {
+  for (auto &Item : InsnVariableIDs) {
+    InstructionMatcher &InsnMatcher = *Item.first;
+    for (auto &OM : InsnMatcher.operands()) {
+      // Complex Patterns are usually expensive and they relatively rarely fail
+      // on their own: more often we end up throwing away all the work done by a
+      // matching part of a complex pattern because some other part of the
+      // enclosing pattern didn't match. All of this makes it beneficial to
+      // delay complex patterns until the very end of the rule matching,
+      // especially for targets having lots of complex patterns.
+      for (auto &OP : OM->predicates())
+        if (isa<ComplexPatternOperandMatcher>(OP))
+          EpilogueMatchers.emplace_back(std::move(OP));
+      OM->eraseNullPredicates();
     }
+    InsnMatcher.optimize();
+  }
+  llvm::sort(
+      EpilogueMatchers.begin(), EpilogueMatchers.end(),
+      [](const std::unique_ptr<PredicateMatcher> &L,
+         const std::unique_ptr<PredicateMatcher> &R) {
+        return std::make_tuple(L->getKind(), L->getInsnVarID(), L->getOpIdx()) <
+               std::make_tuple(R->getKind(), R->getInsnVarID(), R->getOpIdx());
+      });
+}
 
-    if (N->getOperator()->getName() == "ld") {
-      // If it's a signext-load we need to adapt the pattern slightly. We need
-      // to split the node into (sext (ld ...)), remove the <<signext>> predicate,
-      // and then apply the <<signextTY>> predicate by updating the result type
-      // of the load.
-      //
-      // For example:
-      //   (ld:[i32] [iPTR])<<unindexed>><<signext>><<signexti16>>
-      // must be transformed into:
-      //   (sext:[i32] (ld:[i16] [iPTR])<<unindexed>>)
-      //
-      // Likewise for zeroext-load and anyext-load.
-
-      std::vector<TreePredicateFn> Predicates;
-      bool IsSignExtLoad = false;
-      bool IsZeroExtLoad = false;
-      bool IsAnyExtLoad = false;
-      Record *MemVT = nullptr;
-      for (const auto &P : N->getPredicateFns()) {
-        if (P.isLoad() && P.isSignExtLoad()) {
-          IsSignExtLoad = true;
-          continue;
-        }
-        if (P.isLoad() && P.isZeroExtLoad()) {
-          IsZeroExtLoad = true;
-          continue;
-        }
-        if (P.isLoad() && P.isAnyExtLoad()) {
-          IsAnyExtLoad = true;
-          continue;
-        }
-        if (P.isLoad() && P.getMemoryVT()) {
-          MemVT = P.getMemoryVT();
-          continue;
-        }
-        Predicates.push_back(P);
-      }
-
-      if ((IsSignExtLoad || IsZeroExtLoad || IsAnyExtLoad) && MemVT) {
-        assert((IsSignExtLoad + IsZeroExtLoad + IsAnyExtLoad) == 1 &&
-               "IsSignExtLoad, IsZeroExtLoad, IsAnyExtLoad are mutually exclusive");
-        TreePatternNode *Ext = new TreePatternNode(
-            RK.getDef(IsSignExtLoad ? "sext"
-                                    : IsZeroExtLoad ? "zext" : "anyext"),
-            {N}, 1);
-        Ext->setType(0, N->getType(0));
-        N->clearPredicateFns();
-        N->setPredicateFns(Predicates);
-        N->setType(0, getValueType(MemVT));
-        return Ext;
-      }
-    }
-  }
-
-  return N;
+bool RuleMatcher::hasFirstCondition() const {
+  if (insnmatchers_empty())
+    return false;
+  InstructionMatcher &Matcher = insnmatchers_front();
+  if (!Matcher.predicates_empty())
+    return true;
+  for (auto &OM : Matcher.operands())
+    for (auto &OP : OM->predicates())
+      if (!isa<InstructionOperandMatcher>(OP))
+        return true;
+  return false;
 }
 
-void GlobalISelEmitter::fixupPatternTrees(TreePattern *P) {
-  for (unsigned I = 0, E = P->getNumTrees(); I < E; ++I) {
-    TreePatternNode *OrigTree = P->getTree(I);
-    TreePatternNode *NewTree = fixupPatternNode(OrigTree);
-    if (OrigTree != NewTree)
-      P->setTree(I, NewTree);
-  }
+const PredicateMatcher &RuleMatcher::getFirstCondition() const {
+  assert(!insnmatchers_empty() &&
+         "Trying to get a condition from an empty RuleMatcher");
+
+  InstructionMatcher &Matcher = insnmatchers_front();
+  if (!Matcher.predicates_empty())
+    return **Matcher.predicates_begin();
+  // If there is no more predicate on the instruction itself, look at its
+  // operands.
+  for (auto &OM : Matcher.operands())
+    for (auto &OP : OM->predicates())
+      if (!isa<InstructionOperandMatcher>(OP))
+        return *OP;
+
+  llvm_unreachable("Trying to get a condition from an InstructionMatcher with "
+                   "no conditions");
 }
 
-std::unique_ptr<PredicateMatcher> RuleMatcher::forgetFirstCondition() {
+std::unique_ptr<PredicateMatcher> RuleMatcher::popFirstCondition() {
   assert(!insnmatchers_empty() &&
-         "Trying to forget something that does not exist");
+         "Trying to pop a condition from an empty RuleMatcher");
 
   InstructionMatcher &Matcher = insnmatchers_front();
-  std::unique_ptr<PredicateMatcher> Condition;
   if (!Matcher.predicates_empty())
-    Condition = Matcher.predicates_pop_front();
-  if (!Condition) {
-    // If there is no more predicate on the instruction itself, look at its
-    // operands.
-    assert(!Matcher.operands_empty() &&
-           "Empty instruction should have been discarded");
-    OperandMatcher &OpMatcher = **Matcher.operands_begin();
-    assert(!OpMatcher.predicates_empty() && "no operand constraint");
-    Condition = OpMatcher.predicates_pop_front();
-    // If this operand is free of constraints, rip it off.
-    if (OpMatcher.predicates_empty())
-      Matcher.pop_front();
-  }
-  // Rip the instruction off when it is empty.
-  if (Matcher.operands_empty() && Matcher.predicates_empty())
-    insnmatchers_pop_front();
-  return Condition;
-}
-
-bool GroupMatcher::lastConditionMatches(
+    return Matcher.predicates_pop_front();
+  // If there is no more predicate on the instruction itself, look at its
+  // operands.
+  for (auto &OM : Matcher.operands())
+    for (auto &OP : OM->predicates())
+      if (!isa<InstructionOperandMatcher>(OP)) {
+        std::unique_ptr<PredicateMatcher> Result = std::move(OP);
+        OM->eraseNullPredicates();
+        return Result;
+      }
+
+  llvm_unreachable("Trying to pop a condition from an InstructionMatcher with "
+                   "no conditions");
+}
+
+bool GroupMatcher::candidateConditionMatches(
     const PredicateMatcher &Predicate) const {
-  const auto &LastCondition = conditions_back();
-  return Predicate.isIdentical(*LastCondition);
+
+  if (empty()) {
+    // Sharing predicates for nested instructions is not supported yet as we
+    // currently don't hoist the GIM_RecordInsn's properly, therefore we can
+    // only work on the original root instruction (InsnVarID == 0):
+    if (Predicate.getInsnVarID() != 0)
+      return false;
+    // ... otherwise an empty group can handle any predicate with no specific
+    // requirements:
+    return true;
+  }
+
+  const Matcher &Representative = **Matchers.begin();
+  const auto &RepresentativeCondition = Representative.getFirstCondition();
+  // ... if not empty, the group can only accomodate matchers with the exact
+  // same first condition:
+  return Predicate.isIdentical(RepresentativeCondition);
+}
+
+bool GroupMatcher::addMatcher(Matcher &Candidate) {
+  if (!Candidate.hasFirstCondition())
+    return false;
+
+  const PredicateMatcher &Predicate = Candidate.getFirstCondition();
+  if (!candidateConditionMatches(Predicate))
+    return false;
+
+  Matchers.push_back(&Candidate);
+  return true;
+}
+
+void GroupMatcher::finalize() {
+  assert(Conditions.empty() && "Already finalized?");
+  if (empty())
+    return;
+
+  Matcher &FirstRule = **Matchers.begin();
+  for (;;) {
+    // All the checks are expected to succeed during the first iteration:
+    for (const auto &Rule : Matchers)
+      if (!Rule->hasFirstCondition())
+        return;
+    const auto &FirstCondition = FirstRule.getFirstCondition();
+    for (unsigned I = 1, E = Matchers.size(); I < E; ++I)
+      if (!Matchers[I]->getFirstCondition().isIdentical(FirstCondition))
+        return;
+
+    Conditions.push_back(FirstRule.popFirstCondition());
+    for (unsigned I = 1, E = Matchers.size(); I < E; ++I)
+      Matchers[I]->popFirstCondition();
+  }
 }
 
 void GroupMatcher::emit(MatchTable &Table) {
-  unsigned LabelID = Table.allocateLabelID();
-  if (!conditions_empty()) {
+  unsigned LabelID = ~0U;
+  if (!Conditions.empty()) {
+    LabelID = Table.allocateLabelID();
     Table << MatchTable::Opcode("GIM_Try", +1)
           << MatchTable::Comment("On fail goto")
           << MatchTable::JumpTarget(LabelID) << MatchTable::LineBreak;
-    for (auto &Condition : Conditions)
-      Condition->emitPredicateOpcodes(
-          Table, *static_cast<RuleMatcher *>(*Rules.begin()));
   }
-  // Emit the conditions.
-  // Then checks apply the rules.
-  for (const auto &Rule : Rules)
-    Rule->emit(Table);
-  // If we don't succeeded for that block, that means we are not going to select
-  // this instruction.
-  if (!conditions_empty()) {
-    Table << MatchTable::Opcode("GIM_Reject") << MatchTable::LineBreak;
-    Table << MatchTable::Opcode("GIR_Done", -1) << MatchTable::LineBreak
+  for (auto &Condition : Conditions)
+    Condition->emitPredicateOpcodes(
+        Table, *static_cast<RuleMatcher *>(*Matchers.begin()));
+
+  for (const auto &M : Matchers)
+    M->emit(Table);
+
+  // Exit the group
+  if (!Conditions.empty())
+    Table << MatchTable::Opcode("GIM_Reject", -1) << MatchTable::LineBreak
           << MatchTable::Label(LabelID);
+}
+
+bool SwitchMatcher::isSupportedPredicateType(const PredicateMatcher &P) {
+  return isa<InstructionOpcodeMatcher>(P) || isa<LLTOperandMatcher>(P);
+}
+
+bool SwitchMatcher::candidateConditionMatches(
+    const PredicateMatcher &Predicate) const {
+
+  if (empty()) {
+    // Sharing predicates for nested instructions is not supported yet as we
+    // currently don't hoist the GIM_RecordInsn's properly, therefore we can
+    // only work on the original root instruction (InsnVarID == 0):
+    if (Predicate.getInsnVarID() != 0)
+      return false;
+    // ... while an attempt to add even a root matcher to an empty SwitchMatcher
+    // could fail as not all the types of conditions are supported:
+    if (!isSupportedPredicateType(Predicate))
+      return false;
+    // ... or the condition might not have a proper implementation of
+    // getValue() / isIdenticalDownToValue() yet:
+    if (!Predicate.hasValue())
+      return false;
+    // ... otherwise an empty Switch can accomodate the condition with no
+    // further requirements:
+    return true;
+  }
+
+  const Matcher &CaseRepresentative = **Matchers.begin();
+  const auto &RepresentativeCondition = CaseRepresentative.getFirstCondition();
+  // Switch-cases must share the same kind of condition and path to the value it
+  // checks:
+  if (!Predicate.isIdenticalDownToValue(RepresentativeCondition))
+    return false;
+
+  const auto Value = Predicate.getValue();
+  // ... but be unique with respect to the actual value they check:
+  return Values.count(Value) == 0;
+}
+
+bool SwitchMatcher::addMatcher(Matcher &Candidate) {
+  if (!Candidate.hasFirstCondition())
+    return false;
+
+  const PredicateMatcher &Predicate = Candidate.getFirstCondition();
+  if (!candidateConditionMatches(Predicate))
+    return false;
+  const auto Value = Predicate.getValue();
+  Values.insert(Value);
+
+  Matchers.push_back(&Candidate);
+  return true;
+}
+
+void SwitchMatcher::finalize() {
+  assert(Condition == nullptr && "Already finalized");
+  assert(Values.size() == Matchers.size() && "Broken SwitchMatcher");
+  if (empty())
+    return;
+
+  std::stable_sort(Matchers.begin(), Matchers.end(),
+                   [](const Matcher *L, const Matcher *R) {
+                     return L->getFirstCondition().getValue() <
+                            R->getFirstCondition().getValue();
+                   });
+  Condition = Matchers[0]->popFirstCondition();
+  for (unsigned I = 1, E = Values.size(); I < E; ++I)
+    Matchers[I]->popFirstCondition();
+}
+
+void SwitchMatcher::emitPredicateSpecificOpcodes(const PredicateMatcher &P,
+                                                 MatchTable &Table) {
+  assert(isSupportedPredicateType(P) && "Predicate type is not supported");
+
+  if (const auto *Condition = dyn_cast<InstructionOpcodeMatcher>(&P)) {
+    Table << MatchTable::Opcode("GIM_SwitchOpcode") << MatchTable::Comment("MI")
+          << MatchTable::IntValue(Condition->getInsnVarID());
+    return;
+  }
+  if (const auto *Condition = dyn_cast<LLTOperandMatcher>(&P)) {
+    Table << MatchTable::Opcode("GIM_SwitchType") << MatchTable::Comment("MI")
+          << MatchTable::IntValue(Condition->getInsnVarID())
+          << MatchTable::Comment("Op")
+          << MatchTable::IntValue(Condition->getOpIdx());
+    return;
+  }
+
+  llvm_unreachable("emitPredicateSpecificOpcodes is broken: can not handle a "
+                   "predicate type that is claimed to be supported");
+}
+
+void SwitchMatcher::emit(MatchTable &Table) {
+  assert(Values.size() == Matchers.size() && "Broken SwitchMatcher");
+  if (empty())
+    return;
+  assert(Condition != nullptr &&
+         "Broken SwitchMatcher, hasn't been finalized?");
+
+  std::vector<unsigned> LabelIDs(Values.size());
+  std::generate(LabelIDs.begin(), LabelIDs.end(),
+                [&Table]() { return Table.allocateLabelID(); });
+  const unsigned Default = Table.allocateLabelID();
+
+  const int64_t LowerBound = Values.begin()->getRawValue();
+  const int64_t UpperBound = Values.rbegin()->getRawValue() + 1;
+
+  emitPredicateSpecificOpcodes(*Condition, Table);
+
+  Table << MatchTable::Comment("[") << MatchTable::IntValue(LowerBound)
+        << MatchTable::IntValue(UpperBound) << MatchTable::Comment(")")
+        << MatchTable::Comment("default:") << MatchTable::JumpTarget(Default);
+
+  int64_t J = LowerBound;
+  auto VI = Values.begin();
+  for (unsigned I = 0, E = Values.size(); I < E; ++I) {
+    auto V = *VI++;
+    while (J++ < V.getRawValue())
+      Table << MatchTable::IntValue(0);
+    V.turnIntoComment();
+    Table << MatchTable::LineBreak << V << MatchTable::JumpTarget(LabelIDs[I]);
+  }
+  Table << MatchTable::LineBreak;
+
+  for (unsigned I = 0, E = Values.size(); I < E; ++I) {
+    Table << MatchTable::Label(LabelIDs[I]);
+    Matchers[I]->emit(Table);
+    Table << MatchTable::Opcode("GIM_Reject") << MatchTable::LineBreak;
   }
+  Table << MatchTable::Label(Default);
 }
 
-unsigned OperandMatcher::getInsnVarID() const { return Insn.getVarID(); }
+unsigned OperandMatcher::getInsnVarID() const { return Insn.getInsnVarID(); }
 
 } // end anonymous namespace
 
diff --git a/utils/TableGen/InfoByHwMode.cpp b/utils/TableGen/InfoByHwMode.cpp
index d5a181e130a5..7d1f71cc2647 100644
--- a/utils/TableGen/InfoByHwMode.cpp
+++ b/utils/TableGen/InfoByHwMode.cpp
@@ -84,7 +84,7 @@ void ValueTypeByHwMode::writeToStream(raw_ostream &OS) const {
   std::vector<const PairType*> Pairs;
   for (const auto &P : Map)
     Pairs.push_back(&P);
-  std::sort(Pairs.begin(), Pairs.end(), deref<std::less<PairType>>());
+  llvm::sort(Pairs.begin(), Pairs.end(), deref<std::less<PairType>>());
 
   OS << '{';
   for (unsigned i = 0, e = Pairs.size(); i != e; ++i) {
@@ -176,7 +176,7 @@ void RegSizeInfoByHwMode::writeToStream(raw_ostream &OS) const {
   std::vector<const PairType*> Pairs;
   for (const auto &P : Map)
     Pairs.push_back(&P);
-  std::sort(Pairs.begin(), Pairs.end(), deref<std::less<PairType>>());
+  llvm::sort(Pairs.begin(), Pairs.end(), deref<std::less<PairType>>());
 
   OS << '{';
   for (unsigned i = 0, e = Pairs.size(); i != e; ++i) {
diff --git a/utils/TableGen/InfoByHwMode.h b/utils/TableGen/InfoByHwMode.h
index b2e217498888..4838198e704d 100644
--- a/utils/TableGen/InfoByHwMode.h
+++ b/utils/TableGen/InfoByHwMode.h
@@ -16,7 +16,7 @@
 #define LLVM_UTILS_TABLEGEN_INFOBYHWMODE_H
 
 #include "CodeGenHwModes.h"
-#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/Support/MachineValueType.h"
 
 #include <map>
 #include <set>
diff --git a/utils/TableGen/InstrDocsEmitter.cpp b/utils/TableGen/InstrDocsEmitter.cpp
index fa9ee9569427..65cb28cd17a3 100644
--- a/utils/TableGen/InstrDocsEmitter.cpp
+++ b/utils/TableGen/InstrDocsEmitter.cpp
@@ -109,6 +109,7 @@ void EmitInstrDocs(RecordKeeper &RK, raw_ostream &OS) {
     FLAG(isBarrier)
     FLAG(isCall)
     FLAG(isAdd)
+    FLAG(isTrap)
     FLAG(canFoldAsLoad)
     FLAG(mayLoad)
     //FLAG(mayLoad_Unset) // Deliberately omitted.
diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp
index 379e3245d066..a492daac0d09 100644
--- a/utils/TableGen/InstrInfoEmitter.cpp
+++ b/utils/TableGen/InstrInfoEmitter.cpp
@@ -16,6 +16,7 @@
 #include "CodeGenInstruction.h"
 #include "CodeGenSchedule.h"
 #include "CodeGenTarget.h"
+#include "PredicateExpander.h"
 #include "SequenceToOffsetTable.h"
 #include "TableGenBackends.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -59,6 +60,17 @@ private:
   typedef std::map<std::map<unsigned, unsigned>,
                    std::vector<std::string>> OpNameMapTy;
   typedef std::map<std::string, unsigned>::iterator StrUintMapIter;
+
+  /// Generate member functions in the target-specific GenInstrInfo class.
+  ///
+  /// This method is used to custom expand TIIPredicate definitions.
+  /// See file llvm/Target/TargetInstPredicates.td for a description of what is
+  /// a TIIPredicate and how to use it.
+  void emitTIIHelperMethods(raw_ostream &OS);
+
+  /// Expand TIIPredicate definitions to functions that accept a const MCInst
+  /// reference.
+  void emitMCIIHelperMethods(raw_ostream &OS);
   void emitRecord(const CodeGenInstruction &Inst, unsigned Num,
                   Record *InstrInfo,
                   std::map<std::vector<Record*>, unsigned> &EL,
@@ -339,6 +351,74 @@ void InstrInfoEmitter::emitOperandTypesEnum(raw_ostream &OS,
   OS << "#endif // GET_INSTRINFO_OPERAND_TYPES_ENUM\n\n";
 }
 
+void InstrInfoEmitter::emitMCIIHelperMethods(raw_ostream &OS) {
+  RecVec TIIPredicates = Records.getAllDerivedDefinitions("TIIPredicate");
+  if (TIIPredicates.empty())
+    return;
+
+  CodeGenTarget &Target = CDP.getTargetInfo();
+  const StringRef TargetName = Target.getName();
+  formatted_raw_ostream FOS(OS);
+
+  FOS << "#ifdef GET_GENINSTRINFO_MC_DECL\n";
+  FOS << "#undef GET_GENINSTRINFO_MC_DECL\n\n";
+
+  FOS << "namespace llvm {\n";
+  FOS << "class MCInst;\n\n";
+
+  FOS << "namespace " << TargetName << "_MC {\n\n";
+
+  for (const Record *Rec : TIIPredicates) {
+    FOS << "bool " << Rec->getValueAsString("FunctionName")
+        << "(const MCInst &MI);\n";
+  }
+
+  FOS << "\n} // end " << TargetName << "_MC namespace\n";
+  FOS << "} // end llvm namespace\n\n";
+
+  FOS << "#endif // GET_GENINSTRINFO_MC_DECL\n\n";
+
+  FOS << "#ifdef GET_GENINSTRINFO_MC_HELPERS\n";
+  FOS << "#undef GET_GENINSTRINFO_MC_HELPERS\n\n";
+
+  FOS << "namespace llvm {\n";
+  FOS << "namespace " << TargetName << "_MC {\n\n";
+
+  PredicateExpander PE;
+  PE.setExpandForMC(true);
+  for (const Record *Rec : TIIPredicates) {
+    FOS << "bool " << Rec->getValueAsString("FunctionName");
+    FOS << "(const MCInst &MI) {\n";
+    FOS << "  return ";
+    PE.expandPredicate(FOS, Rec->getValueAsDef("Pred"));
+    FOS << ";\n}\n";
+  }
+
+  FOS << "\n} // end " << TargetName << "_MC namespace\n";
+  FOS << "} // end llvm namespace\n\n";
+
+  FOS << "#endif // GET_GENISTRINFO_MC_HELPERS\n";
+}
+
+void InstrInfoEmitter::emitTIIHelperMethods(raw_ostream &OS) {
+  RecVec TIIPredicates = Records.getAllDerivedDefinitions("TIIPredicate");
+  if (TIIPredicates.empty())
+    return;
+
+  formatted_raw_ostream FOS(OS);
+  PredicateExpander PE;
+  PE.setExpandForMC(false);
+  PE.setIndentLevel(2);
+
+  for (const Record *Rec : TIIPredicates) {
+    FOS << "\n  static bool " << Rec->getValueAsString("FunctionName");
+    FOS << "(const MachineInstr &MI) {\n";
+    FOS << "    return ";
+    PE.expandPredicate(FOS, Rec->getValueAsDef("Pred"));
+    FOS << ";\n  }\n";
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // Main Output.
 //===----------------------------------------------------------------------===//
@@ -435,9 +515,11 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
   OS << "struct " << ClassName << " : public TargetInstrInfo {\n"
      << "  explicit " << ClassName
      << "(int CFSetupOpcode = -1, int CFDestroyOpcode = -1, int CatchRetOpcode = -1, int ReturnOpcode = -1);\n"
-     << "  ~" << ClassName << "() override = default;\n"
-     << "};\n";
-  OS << "} // end llvm namespace\n";
+     << "  ~" << ClassName << "() override = default;\n";
+
+  emitTIIHelperMethods(OS);
+
+  OS << "\n};\n} // end llvm namespace\n";
 
   OS << "#endif // GET_INSTRINFO_HEADER\n\n";
 
@@ -461,6 +543,8 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
   emitOperandNameMappings(OS, Target, NumberedInstructions);
 
   emitOperandTypesEnum(OS, Target);
+
+  emitMCIIHelperMethods(OS);
 }
 
 void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
@@ -480,6 +564,8 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
      << Inst.TheDef->getValueAsInt("Size") << ",\t"
      << SchedModels.getSchedClassIdx(Inst) << ",\t0";
 
+  CodeGenTarget &Target = CDP.getTargetInfo();
+
   // Emit all of the target independent flags...
   if (Inst.isPseudo)           OS << "|(1ULL<<MCID::Pseudo)";
   if (Inst.isReturn)           OS << "|(1ULL<<MCID::Return)";
@@ -487,8 +573,10 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
   if (Inst.isIndirectBranch)   OS << "|(1ULL<<MCID::IndirectBranch)";
   if (Inst.isCompare)          OS << "|(1ULL<<MCID::Compare)";
   if (Inst.isMoveImm)          OS << "|(1ULL<<MCID::MoveImm)";
+  if (Inst.isMoveReg)          OS << "|(1ULL<<MCID::MoveReg)";
   if (Inst.isBitcast)          OS << "|(1ULL<<MCID::Bitcast)";
   if (Inst.isAdd)              OS << "|(1ULL<<MCID::Add)";
+  if (Inst.isTrap)             OS << "|(1ULL<<MCID::Trap)";
   if (Inst.isSelect)           OS << "|(1ULL<<MCID::Select)";
   if (Inst.isBarrier)          OS << "|(1ULL<<MCID::Barrier)";
   if (Inst.hasDelaySlot)       OS << "|(1ULL<<MCID::DelaySlot)";
@@ -508,8 +596,10 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
   if (Inst.Operands.isVariadic)OS << "|(1ULL<<MCID::Variadic)";
   if (Inst.hasSideEffects)     OS << "|(1ULL<<MCID::UnmodeledSideEffects)";
   if (Inst.isAsCheapAsAMove)   OS << "|(1ULL<<MCID::CheapAsAMove)";
-  if (Inst.hasExtraSrcRegAllocReq) OS << "|(1ULL<<MCID::ExtraSrcRegAllocReq)";
-  if (Inst.hasExtraDefRegAllocReq) OS << "|(1ULL<<MCID::ExtraDefRegAllocReq)";
+  if (!Target.getAllowRegisterRenaming() || Inst.hasExtraSrcRegAllocReq)
+    OS << "|(1ULL<<MCID::ExtraSrcRegAllocReq)";
+  if (!Target.getAllowRegisterRenaming() || Inst.hasExtraDefRegAllocReq)
+    OS << "|(1ULL<<MCID::ExtraDefRegAllocReq)";
   if (Inst.isRegSequence) OS << "|(1ULL<<MCID::RegSequence)";
   if (Inst.isExtractSubreg) OS << "|(1ULL<<MCID::ExtractSubreg)";
   if (Inst.isInsertSubreg) OS << "|(1ULL<<MCID::InsertSubreg)";
@@ -550,7 +640,6 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
   else
     OS << "OperandInfo" << OpInfo.find(OperandInfo)->second;
 
-  CodeGenTarget &Target = CDP.getTargetInfo();
   if (Inst.HasComplexDeprecationPredicate)
     // Emit a function pointer to the complex predicate method.
     OS << ", -1 "
diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp
index ba793ad9b938..06e44e3b57c1 100644
--- a/utils/TableGen/IntrinsicEmitter.cpp
+++ b/utils/TableGen/IntrinsicEmitter.cpp
@@ -34,7 +34,7 @@ public:
   IntrinsicEmitter(RecordKeeper &R, bool T)
     : Records(R), TargetOnly(T) {}
 
-  void run(raw_ostream &OS);
+  void run(raw_ostream &OS, bool Enums);
 
   void EmitPrefix(raw_ostream &OS);
 
@@ -56,7 +56,7 @@ public:
 // IntrinsicEmitter Implementation
 //===----------------------------------------------------------------------===//
 
-void IntrinsicEmitter::run(raw_ostream &OS) {
+void IntrinsicEmitter::run(raw_ostream &OS, bool Enums) {
   emitSourceFileHeader("Intrinsic Function Source Fragment", OS);
 
   CodeGenIntrinsicTable Ints(Records, TargetOnly);
@@ -66,29 +66,31 @@ void IntrinsicEmitter::run(raw_ostream &OS) {
 
   EmitPrefix(OS);
 
-  // Emit the enum information.
-  EmitEnumInfo(Ints, OS);
-
-  // Emit the target metadata.
-  EmitTargetInfo(Ints, OS);
+  if (Enums) {
+    // Emit the enum information.
+    EmitEnumInfo(Ints, OS);
+  } else {
+    // Emit the target metadata.
+    EmitTargetInfo(Ints, OS);
 
-  // Emit the intrinsic ID -> name table.
-  EmitIntrinsicToNameTable(Ints, OS);
+    // Emit the intrinsic ID -> name table.
+    EmitIntrinsicToNameTable(Ints, OS);
 
-  // Emit the intrinsic ID -> overload table.
-  EmitIntrinsicToOverloadTable(Ints, OS);
+    // Emit the intrinsic ID -> overload table.
+    EmitIntrinsicToOverloadTable(Ints, OS);
 
-  // Emit the intrinsic declaration generator.
-  EmitGenerator(Ints, OS);
+    // Emit the intrinsic declaration generator.
+    EmitGenerator(Ints, OS);
 
-  // Emit the intrinsic parameter attributes.
-  EmitAttributes(Ints, OS);
+    // Emit the intrinsic parameter attributes.
+    EmitAttributes(Ints, OS);
 
-  // Emit code to translate GCC builtins into LLVM intrinsics.
-  EmitIntrinsicToBuiltinMap(Ints, true, OS);
+    // Emit code to translate GCC builtins into LLVM intrinsics.
+    EmitIntrinsicToBuiltinMap(Ints, true, OS);
 
-  // Emit code to translate MS builtins into LLVM intrinsics.
-  EmitIntrinsicToBuiltinMap(Ints, false, OS);
+    // Emit code to translate MS builtins into LLVM intrinsics.
+    EmitIntrinsicToBuiltinMap(Ints, false, OS);
+  }
 
   EmitSuffix(OS);
 }
@@ -172,7 +174,7 @@ void IntrinsicEmitter::EmitIntrinsicToOverloadTable(
 }
 
 
-// NOTE: This must be kept in synch with the copy in lib/VMCore/Function.cpp!
+// NOTE: This must be kept in synch with the copy in lib/IR/Function.cpp!
 enum IIT_Info {
   // Common values should be encoded with 0-15.
   IIT_Done = 0,
@@ -217,7 +219,8 @@ enum IIT_Info {
   IIT_V1024 = 37,
   IIT_STRUCT6 = 38,
   IIT_STRUCT7 = 39,
-  IIT_STRUCT8 = 40
+  IIT_STRUCT8 = 40,
+  IIT_F128 = 41
 };
 
 static void EncodeFixedValueType(MVT::SimpleValueType VT,
@@ -240,6 +243,7 @@ static void EncodeFixedValueType(MVT::SimpleValueType VT,
   case MVT::f16: return Sig.push_back(IIT_F16);
   case MVT::f32: return Sig.push_back(IIT_F32);
   case MVT::f64: return Sig.push_back(IIT_F64);
+  case MVT::f128: return Sig.push_back(IIT_F128);
   case MVT::token: return Sig.push_back(IIT_TOKEN);
   case MVT::Metadata: return Sig.push_back(IIT_METADATA);
   case MVT::x86mmx: return Sig.push_back(IIT_MMX);
@@ -839,6 +843,12 @@ void IntrinsicEmitter::EmitIntrinsicToBuiltinMap(
   OS << "#endif\n\n";
 }
 
-void llvm::EmitIntrinsics(RecordKeeper &RK, raw_ostream &OS, bool TargetOnly) {
-  IntrinsicEmitter(RK, TargetOnly).run(OS);
+void llvm::EmitIntrinsicEnums(RecordKeeper &RK, raw_ostream &OS,
+                              bool TargetOnly) {
+  IntrinsicEmitter(RK, TargetOnly).run(OS, /*Enums=*/true);
+}
+
+void llvm::EmitIntrinsicImpl(RecordKeeper &RK, raw_ostream &OS,
+                             bool TargetOnly) {
+  IntrinsicEmitter(RK, TargetOnly).run(OS, /*Enums=*/false);
 }
diff --git a/utils/TableGen/LLVMBuild.txt b/utils/TableGen/LLVMBuild.txt
index b0081eb588d1..66387cfa0d1a 100644
--- a/utils/TableGen/LLVMBuild.txt
+++ b/utils/TableGen/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = BuildTool
 name = tblgen
 parent = BuildTools
-required_libraries = Support TableGen
+required_libraries = Support TableGen MC
diff --git a/utils/TableGen/PredicateExpander.cpp b/utils/TableGen/PredicateExpander.cpp
new file mode 100644
index 000000000000..68eb32794a02
--- /dev/null
+++ b/utils/TableGen/PredicateExpander.cpp
@@ -0,0 +1,262 @@
+//===--------------------- PredicateExpander.cpp --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// Functionalities used by the Tablegen backends to expand machine predicates.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PredicateExpander.h"
+
+namespace llvm {
+
+void PredicateExpander::expandTrue(formatted_raw_ostream &OS) { OS << "true"; }
+void PredicateExpander::expandFalse(formatted_raw_ostream &OS) {
+  OS << "false";
+}
+
+void PredicateExpander::expandCheckImmOperand(formatted_raw_ostream &OS,
+                                              int OpIndex, int ImmVal) {
+  OS << "MI" << (isByRef() ? "." : "->") << "getOperand(" << OpIndex
+     << ").getImm() " << (shouldNegate() ? "!= " : "== ") << ImmVal;
+}
+
+void PredicateExpander::expandCheckImmOperand(formatted_raw_ostream &OS,
+                                              int OpIndex, StringRef ImmVal) {
+  OS << "MI" << (isByRef() ? "." : "->") << "getOperand(" << OpIndex
+     << ").getImm() " << (shouldNegate() ? "!= " : "== ") << ImmVal;
+}
+
+void PredicateExpander::expandCheckRegOperand(formatted_raw_ostream &OS,
+                                              int OpIndex, const Record *Reg) {
+  assert(Reg->isSubClassOf("Register") && "Expected a register Record!");
+
+  OS << "MI" << (isByRef() ? "." : "->") << "getOperand(" << OpIndex
+     << ").getReg() " << (shouldNegate() ? "!= " : "== ");
+  const StringRef Str = Reg->getValueAsString("Namespace");
+  if (!Str.empty())
+    OS << Str << "::";
+  OS << Reg->getName();
+}
+
+void PredicateExpander::expandCheckInvalidRegOperand(formatted_raw_ostream &OS,
+                                                     int OpIndex) {
+  OS << "MI" << (isByRef() ? "." : "->") << "getOperand(" << OpIndex
+     << ").getReg() " << (shouldNegate() ? "!= " : "== ") << "0";
+}
+
+void PredicateExpander::expandCheckSameRegOperand(formatted_raw_ostream &OS,
+                                                  int First, int Second) {
+  OS << "MI" << (isByRef() ? "." : "->") << "getOperand(" << First
+     << ").getReg() " << (shouldNegate() ? "!=" : "==") << " MI"
+     << (isByRef() ? "." : "->") << "getOperand(" << Second << ").getReg()";
+}
+
+void PredicateExpander::expandCheckNumOperands(formatted_raw_ostream &OS,
+                                               int NumOps) {
+  OS << "MI" << (isByRef() ? "." : "->") << "getNumOperands() "
+     << (shouldNegate() ? "!= " : "== ") << NumOps;
+}
+
+void PredicateExpander::expandCheckOpcode(formatted_raw_ostream &OS,
+                                          const Record *Inst) {
+  OS << "MI" << (isByRef() ? "." : "->") << "getOpcode() "
+     << (shouldNegate() ? "!= " : "== ") << Inst->getValueAsString("Namespace")
+     << "::" << Inst->getName();
+}
+
+void PredicateExpander::expandCheckOpcode(formatted_raw_ostream &OS,
+                                          const RecVec &Opcodes) {
+  assert(!Opcodes.empty() && "Expected at least one opcode to check!");
+  bool First = true;
+
+  if (Opcodes.size() == 1) {
+    OS << "( ";
+    expandCheckOpcode(OS, Opcodes[0]);
+    OS << " )";
+    return;
+  }
+
+  OS << '(';
+  increaseIndentLevel();
+  for (const Record *Rec : Opcodes) {
+    OS << '\n';
+    OS.PadToColumn(getIndentLevel() * 2);
+    if (!First)
+      OS << (shouldNegate() ? "&& " : "|| ");
+
+    expandCheckOpcode(OS, Rec);
+    First = false;
+  }
+
+  OS << '\n';
+  decreaseIndentLevel();
+  OS.PadToColumn(getIndentLevel() * 2);
+  OS << ')';
+}
+
+void PredicateExpander::expandCheckPseudo(formatted_raw_ostream &OS,
+                                          const RecVec &Opcodes) {
+  if (shouldExpandForMC())
+    expandFalse(OS);
+  else
+    expandCheckOpcode(OS, Opcodes);
+}
+
+void PredicateExpander::expandPredicateSequence(formatted_raw_ostream &OS,
+                                                const RecVec &Sequence,
+                                                bool IsCheckAll) {
+  assert(!Sequence.empty() && "Found an invalid empty predicate set!");
+  if (Sequence.size() == 1)
+    return expandPredicate(OS, Sequence[0]);
+
+  // Okay, there is more than one predicate in the set.
+  bool First = true;
+  OS << (shouldNegate() ? "!(" : "(");
+  increaseIndentLevel();
+
+  bool OldValue = shouldNegate();
+  setNegatePredicate(false);
+  for (const Record *Rec : Sequence) {
+    OS << '\n';
+    OS.PadToColumn(getIndentLevel() * 2);
+    if (!First)
+      OS << (IsCheckAll ? "&& " : "|| ");
+    expandPredicate(OS, Rec);
+    First = false;
+  }
+  OS << '\n';
+  decreaseIndentLevel();
+  OS.PadToColumn(getIndentLevel() * 2);
+  OS << ')';
+  setNegatePredicate(OldValue);
+}
+
+void PredicateExpander::expandTIIFunctionCall(formatted_raw_ostream &OS,
+                                              StringRef TargetName,
+                                              StringRef MethodName) {
+  OS << (shouldNegate() ? "!" : "");
+  if (shouldExpandForMC())
+    OS << TargetName << "_MC::";
+  else
+    OS << TargetName << "Gen"
+       << "InstrInfo::";
+  OS << MethodName << (isByRef() ? "(MI)" : "(*MI)");
+}
+
+void PredicateExpander::expandCheckIsRegOperand(formatted_raw_ostream &OS,
+                                                int OpIndex) {
+  OS << (shouldNegate() ? "!" : "") << "MI" << (isByRef() ? "." : "->")
+     << "getOperand(" << OpIndex << ").isReg() ";
+}
+
+void PredicateExpander::expandCheckIsImmOperand(formatted_raw_ostream &OS,
+                                                int OpIndex) {
+  OS << (shouldNegate() ? "!" : "") << "MI" << (isByRef() ? "." : "->")
+     << "getOperand(" << OpIndex << ").isImm() ";
+}
+
+void PredicateExpander::expandCheckFunctionPredicate(formatted_raw_ostream &OS,
+                                                     StringRef MCInstFn,
+                                                     StringRef MachineInstrFn) {
+  OS << (shouldExpandForMC() ? MCInstFn : MachineInstrFn)
+     << (isByRef() ? "(MI)" : "(*MI)");
+}
+
+void PredicateExpander::expandCheckNonPortable(formatted_raw_ostream &OS,
+                                               StringRef Code) {
+  if (shouldExpandForMC())
+    return expandFalse(OS);
+
+  OS << '(' << Code << ')';
+}
+
+void PredicateExpander::expandPredicate(formatted_raw_ostream &OS,
+                                        const Record *Rec) {
+  OS.flush();
+  unsigned ColNum = getIndentLevel() * 2;
+  if (OS.getColumn() < ColNum)
+    OS.PadToColumn(ColNum);
+
+  if (Rec->isSubClassOf("MCTrue")) {
+    if (shouldNegate())
+      return expandFalse(OS);
+    return expandTrue(OS);
+  }
+
+  if (Rec->isSubClassOf("MCFalse")) {
+    if (shouldNegate())
+      return expandTrue(OS);
+    return expandFalse(OS);
+  }
+
+  if (Rec->isSubClassOf("CheckNot")) {
+    flipNegatePredicate();
+    expandPredicate(OS, Rec->getValueAsDef("Pred"));
+    flipNegatePredicate();
+    return;
+  }
+
+  if (Rec->isSubClassOf("CheckIsRegOperand"))
+    return expandCheckIsRegOperand(OS, Rec->getValueAsInt("OpIndex"));
+
+  if (Rec->isSubClassOf("CheckIsImmOperand"))
+    return expandCheckIsImmOperand(OS, Rec->getValueAsInt("OpIndex"));
+
+  if (Rec->isSubClassOf("CheckRegOperand"))
+    return expandCheckRegOperand(OS, Rec->getValueAsInt("OpIndex"),
+                                 Rec->getValueAsDef("Reg"));
+
+  if (Rec->isSubClassOf("CheckInvalidRegOperand"))
+    return expandCheckInvalidRegOperand(OS, Rec->getValueAsInt("OpIndex"));
+
+  if (Rec->isSubClassOf("CheckImmOperand"))
+    return expandCheckImmOperand(OS, Rec->getValueAsInt("OpIndex"),
+                                 Rec->getValueAsInt("ImmVal"));
+
+  if (Rec->isSubClassOf("CheckImmOperand_s"))
+    return expandCheckImmOperand(OS, Rec->getValueAsInt("OpIndex"),
+                                 Rec->getValueAsString("ImmVal"));
+
+  if (Rec->isSubClassOf("CheckSameRegOperand"))
+    return expandCheckSameRegOperand(OS, Rec->getValueAsInt("FirstIndex"),
+                                     Rec->getValueAsInt("SecondIndex"));
+
+  if (Rec->isSubClassOf("CheckNumOperands"))
+    return expandCheckNumOperands(OS, Rec->getValueAsInt("NumOps"));
+
+  if (Rec->isSubClassOf("CheckPseudo"))
+    return expandCheckPseudo(OS, Rec->getValueAsListOfDefs("ValidOpcodes"));
+
+  if (Rec->isSubClassOf("CheckOpcode"))
+    return expandCheckOpcode(OS, Rec->getValueAsListOfDefs("ValidOpcodes"));
+
+  if (Rec->isSubClassOf("CheckAll"))
+    return expandPredicateSequence(OS, Rec->getValueAsListOfDefs("Predicates"),
+                                   /* AllOf */ true);
+
+  if (Rec->isSubClassOf("CheckAny"))
+    return expandPredicateSequence(OS, Rec->getValueAsListOfDefs("Predicates"),
+                                   /* AllOf */ false);
+
+  if (Rec->isSubClassOf("CheckFunctionPredicate"))
+    return expandCheckFunctionPredicate(
+        OS, Rec->getValueAsString("MCInstFnName"),
+        Rec->getValueAsString("MachineInstrFnName"));
+
+  if (Rec->isSubClassOf("CheckNonPortable"))
+    return expandCheckNonPortable(OS, Rec->getValueAsString("CodeBlock"));
+
+  if (Rec->isSubClassOf("TIIPredicate"))
+    return expandTIIFunctionCall(OS, Rec->getValueAsString("TargetName"),
+                                 Rec->getValueAsString("FunctionName"));
+
+  llvm_unreachable("No known rules to expand this MCInstPredicate");
+}
+
+} // namespace llvm
diff --git a/utils/TableGen/PredicateExpander.h b/utils/TableGen/PredicateExpander.h
new file mode 100644
index 000000000000..398b376f7a83
--- /dev/null
+++ b/utils/TableGen/PredicateExpander.h
@@ -0,0 +1,86 @@
+//===--------------------- PredicateExpander.h ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// Functionalities used by the Tablegen backends to expand machine predicates.
+///
+/// See file llvm/Target/TargetInstrPredicate.td for a full list and description
+/// of all the supported MCInstPredicate classes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_TABLEGEN_PREDICATEEXPANDER_H
+#define LLVM_UTILS_TABLEGEN_PREDICATEEXPANDER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/TableGen/Record.h"
+
+namespace llvm {
+
+class formatted_raw_ostream;
+
+class PredicateExpander {
+  bool EmitCallsByRef;
+  bool NegatePredicate;
+  bool ExpandForMC;
+  unsigned IndentLevel;
+
+  PredicateExpander(const PredicateExpander &) = delete;
+  PredicateExpander &operator=(const PredicateExpander &) = delete;
+
+public:
+  PredicateExpander()
+      : EmitCallsByRef(true), NegatePredicate(false), ExpandForMC(false),
+        IndentLevel(1U) {}
+  bool isByRef() const { return EmitCallsByRef; }
+  bool shouldNegate() const { return NegatePredicate; }
+  bool shouldExpandForMC() const { return ExpandForMC; }
+  unsigned getIndentLevel() const { return IndentLevel; }
+
+  void setByRef(bool Value) { EmitCallsByRef = Value; }
+  void flipNegatePredicate() { NegatePredicate = !NegatePredicate; }
+  void setNegatePredicate(bool Value) { NegatePredicate = Value; }
+  void setExpandForMC(bool Value) { ExpandForMC = Value; }
+  void increaseIndentLevel() { ++IndentLevel; }
+  void decreaseIndentLevel() { --IndentLevel; }
+  void setIndentLevel(unsigned Level) { IndentLevel = Level; }
+
+  using RecVec = std::vector<Record *>;
+  void expandTrue(formatted_raw_ostream &OS);
+  void expandFalse(formatted_raw_ostream &OS);
+  void expandCheckImmOperand(formatted_raw_ostream &OS, int OpIndex,
+                             int ImmVal);
+  void expandCheckImmOperand(formatted_raw_ostream &OS, int OpIndex,
+                             StringRef ImmVal);
+  void expandCheckRegOperand(formatted_raw_ostream &OS, int OpIndex,
+                             const Record *Reg);
+  void expandCheckSameRegOperand(formatted_raw_ostream &OS, int First,
+                                 int Second);
+  void expandCheckNumOperands(formatted_raw_ostream &OS, int NumOps);
+  void expandCheckOpcode(formatted_raw_ostream &OS, const Record *Inst);
+
+  void expandCheckPseudo(formatted_raw_ostream &OS, const RecVec &Opcodes);
+  void expandCheckOpcode(formatted_raw_ostream &OS, const RecVec &Opcodes);
+  void expandPredicateSequence(formatted_raw_ostream &OS,
+                               const RecVec &Sequence, bool IsCheckAll);
+  void expandTIIFunctionCall(formatted_raw_ostream &OS, StringRef TargetName,
+                             StringRef MethodName);
+  void expandCheckIsRegOperand(formatted_raw_ostream &OS, int OpIndex);
+  void expandCheckIsImmOperand(formatted_raw_ostream &OS, int OpIndex);
+  void expandCheckInvalidRegOperand(formatted_raw_ostream &OS, int OpIndex);
+  void expandCheckFunctionPredicate(formatted_raw_ostream &OS,
+                                    StringRef MCInstFn,
+                                    StringRef MachineInstrFn);
+  void expandCheckNonPortable(formatted_raw_ostream &OS, StringRef CodeBlock);
+  void expandPredicate(formatted_raw_ostream &OS, const Record *Rec);
+};
+
+} // namespace llvm
+
+#endif
diff --git a/utils/TableGen/PseudoLoweringEmitter.cpp b/utils/TableGen/PseudoLoweringEmitter.cpp
index 63bdd36235a0..a363015730f3 100644
--- a/utils/TableGen/PseudoLoweringEmitter.cpp
+++ b/utils/TableGen/PseudoLoweringEmitter.cpp
@@ -120,13 +120,13 @@ addDagOperandMapping(Record *Rec, DagInit *Dag, CodeGenInstruction &Insn,
 }
 
 void PseudoLoweringEmitter::evaluateExpansion(Record *Rec) {
-  DEBUG(dbgs() << "Pseudo definition: " << Rec->getName() << "\n");
+  LLVM_DEBUG(dbgs() << "Pseudo definition: " << Rec->getName() << "\n");
 
   // Validate that the result pattern has the corrent number and types
   // of arguments for the instruction it references.
   DagInit *Dag = Rec->getValueAsDag("ResultInst");
   assert(Dag && "Missing result instruction in pseudo expansion!");
-  DEBUG(dbgs() << "  Result: " << *Dag << "\n");
+  LLVM_DEBUG(dbgs() << "  Result: " << *Dag << "\n");
 
   DefInit *OpDef = dyn_cast<DefInit>(Dag->getOperator());
   if (!OpDef)
@@ -170,7 +170,7 @@ void PseudoLoweringEmitter::evaluateExpansion(Record *Rec) {
   for (unsigned i = 0, e = SourceInsn.Operands.size(); i != e; ++i)
     SourceOperands[SourceInsn.Operands[i].Name] = i;
 
-  DEBUG(dbgs() << "  Operand mapping:\n");
+  LLVM_DEBUG(dbgs() << "  Operand mapping:\n");
   for (unsigned i = 0, e = Insn.Operands.size(); i != e; ++i) {
     // We've already handled constant values. Just map instruction operands
     // here.
@@ -188,7 +188,8 @@ void PseudoLoweringEmitter::evaluateExpansion(Record *Rec) {
       OperandMap[Insn.Operands[i].MIOperandNo + I].Data.Operand =
         SourceOp->getValue();
 
-    DEBUG(dbgs() << "    " << SourceOp->getValue() << " ==> " << i << "\n");
+    LLVM_DEBUG(dbgs() << "    " << SourceOp->getValue() << " ==> " << i
+                      << "\n");
   }
 
   Expansions.push_back(PseudoExpansion(SourceInsn, Insn, OperandMap));
diff --git a/utils/TableGen/RISCVCompressInstEmitter.cpp b/utils/TableGen/RISCVCompressInstEmitter.cpp
new file mode 100644
index 000000000000..e03663b40f8a
--- /dev/null
+++ b/utils/TableGen/RISCVCompressInstEmitter.cpp
@@ -0,0 +1,810 @@
+//===- RISCVCompressInstEmitter.cpp - Generator for RISCV Compression -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// RISCVCompressInstEmitter implements a tablegen-driven CompressPat based
+// RISCV Instruction Compression mechanism.
+//
+//===--------------------------------------------------------------===//
+//
+// RISCVCompressInstEmitter implements a tablegen-driven CompressPat Instruction
+// Compression mechanism for generating RISCV compressed instructions
+// (C ISA Extension) from the expanded instruction form.
+
+// This tablegen backend processes CompressPat declarations in a
+// td file and generates all the required checks to validate the pattern
+// declarations; validate the input and output operands to generate the correct
+// compressed instructions. The checks include validating  different types of
+// operands; register operands, immediate operands, fixed register and fixed
+// immediate inputs.
+//
+// Example:
+// class CompressPat<dag input, dag output> {
+//   dag Input  = input;
+//   dag Output    = output;
+//   list<Predicate> Predicates = [];
+// }
+//
+// let Predicates = [HasStdExtC] in {
+// def : CompressPat<(ADD GPRNoX0:$rs1, GPRNoX0:$rs1, GPRNoX0:$rs2),
+//                   (C_ADD GPRNoX0:$rs1, GPRNoX0:$rs2)>;
+// }
+//
+// The result is an auto-generated header file
+// 'RISCVGenCompressInstEmitter.inc' which exports two functions for
+// compressing/uncompressing MCInst instructions, plus
+// some helper functions:
+//
+// bool compressInst(MCInst& OutInst, const MCInst &MI,
+//                   const MCSubtargetInfo &STI,
+//                   MCContext &Context);
+//
+// bool uncompressInst(MCInst& OutInst, const MCInst &MI,
+//                     const MCRegisterInfo &MRI,
+//                     const MCSubtargetInfo &STI);
+//
+// The clients that include this auto-generated header file and
+// invoke these functions can compress an instruction before emitting
+// it in the target-specific ASM or ELF streamer or can uncompress
+// an instruction before printing it when the expanded instruction
+// format aliases is favored.
+
+//===----------------------------------------------------------------------===//
+
+#include "CodeGenInstruction.h"
+#include "CodeGenTarget.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
+#include <vector>
+using namespace llvm;
+
+#define DEBUG_TYPE "compress-inst-emitter"
+
+namespace {
+class RISCVCompressInstEmitter {
+  struct OpData {
+    enum MapKind { Operand, Imm, Reg };
+    MapKind Kind;
+    union {
+      unsigned Operand; // Operand number mapped to.
+      uint64_t Imm;     // Integer immediate value.
+      Record *Reg;      // Physical register.
+    } Data;
+    int TiedOpIdx = -1; // Tied operand index within the instruction.
+  };
+  struct CompressPat {
+    CodeGenInstruction Source; // The source instruction definition.
+    CodeGenInstruction Dest;   // The destination instruction to transform to.
+    std::vector<Record *>
+        PatReqFeatures; // Required target features to enable pattern.
+    IndexedMap<OpData>
+        SourceOperandMap; // Maps operands in the Source Instruction to
+                          // the corresponding Dest instruction operand.
+    IndexedMap<OpData>
+        DestOperandMap; // Maps operands in the Dest Instruction
+                        // to the corresponding Source instruction operand.
+    CompressPat(CodeGenInstruction &S, CodeGenInstruction &D,
+                std::vector<Record *> RF, IndexedMap<OpData> &SourceMap,
+                IndexedMap<OpData> &DestMap)
+        : Source(S), Dest(D), PatReqFeatures(RF), SourceOperandMap(SourceMap),
+          DestOperandMap(DestMap) {}
+  };
+
+  RecordKeeper &Records;
+  CodeGenTarget Target;
+  SmallVector<CompressPat, 4> CompressPatterns;
+
+  void addDagOperandMapping(Record *Rec, DagInit *Dag, CodeGenInstruction &Inst,
+                            IndexedMap<OpData> &OperandMap, bool IsSourceInst);
+  void evaluateCompressPat(Record *Compress);
+  void emitCompressInstEmitter(raw_ostream &o, bool Compress);
+  bool validateTypes(Record *SubType, Record *Type, bool IsSourceInst);
+  bool validateRegister(Record *Reg, Record *RegClass);
+  void createDagOperandMapping(Record *Rec, StringMap<unsigned> &SourceOperands,
+                               StringMap<unsigned> &DestOperands,
+                               DagInit *SourceDag, DagInit *DestDag,
+                               IndexedMap<OpData> &SourceOperandMap);
+
+  void createInstOperandMapping(Record *Rec, DagInit *SourceDag,
+                                DagInit *DestDag,
+                                IndexedMap<OpData> &SourceOperandMap,
+                                IndexedMap<OpData> &DestOperandMap,
+                                StringMap<unsigned> &SourceOperands,
+                                CodeGenInstruction &DestInst);
+
+public:
+  RISCVCompressInstEmitter(RecordKeeper &R) : Records(R), Target(R) {}
+
+  void run(raw_ostream &o);
+};
+} // End anonymous namespace.
+
+bool RISCVCompressInstEmitter::validateRegister(Record *Reg, Record *RegClass) {
+  assert(Reg->isSubClassOf("Register") && "Reg record should be a Register\n");
+  assert(RegClass->isSubClassOf("RegisterClass") && "RegClass record should be"
+                                                    " a RegisterClass\n");
+  CodeGenRegisterClass RC = Target.getRegisterClass(RegClass);
+  const CodeGenRegister *R = Target.getRegisterByName(Reg->getName().lower());
+  assert((R != nullptr) &&
+         ("Register" + Reg->getName().str() + " not defined!!\n").c_str());
+  return RC.contains(R);
+}
+
+bool RISCVCompressInstEmitter::validateTypes(Record *DagOpType,
+                                             Record *InstOpType,
+                                             bool IsSourceInst) {
+  if (DagOpType == InstOpType)
+    return true;
+  // Only source instruction operands are allowed to not match Input Dag
+  // operands.
+  if (!IsSourceInst)
+    return false;
+
+  if (DagOpType->isSubClassOf("RegisterClass") &&
+      InstOpType->isSubClassOf("RegisterClass")) {
+    CodeGenRegisterClass RC = Target.getRegisterClass(InstOpType);
+    CodeGenRegisterClass SubRC = Target.getRegisterClass(DagOpType);
+    return RC.hasSubClass(&SubRC);
+  }
+
+  // At this point either or both types are not registers, reject the pattern.
+  if (DagOpType->isSubClassOf("RegisterClass") ||
+      InstOpType->isSubClassOf("RegisterClass"))
+    return false;
+
+  // Let further validation happen when compress()/uncompress() functions are
+  // invoked.
+  LLVM_DEBUG(dbgs() << (IsSourceInst ? "Input" : "Output")
+                    << " Dag Operand Type: '" << DagOpType->getName()
+                    << "' and "
+                    << "Instruction Operand Type: '" << InstOpType->getName()
+                    << "' can't be checked at pattern validation time!\n");
+  return true;
+}
+
+/// The patterns in the Dag contain different types of operands:
+/// Register operands, e.g.: GPRC:$rs1; Fixed registers, e.g: X1; Immediate
+/// operands, e.g.: simm6:$imm; Fixed immediate operands, e.g.: 0. This function
+/// maps Dag operands to its corresponding instruction operands. For register
+/// operands and fixed registers it expects the Dag operand type to be contained
+/// in the instantiated instruction operand type. For immediate operands and
+/// immediates no validation checks are enforced at pattern validation time.
+void RISCVCompressInstEmitter::addDagOperandMapping(
+    Record *Rec, DagInit *Dag, CodeGenInstruction &Inst,
+    IndexedMap<OpData> &OperandMap, bool IsSourceInst) {
+  // TiedCount keeps track of the number of operands skipped in Inst
+  // operands list to get to the corresponding Dag operand. This is
+  // necessary because the number of operands in Inst might be greater
+  // than number of operands in the Dag due to how tied operands
+  // are represented.
+  unsigned TiedCount = 0;
+  for (unsigned i = 0, e = Inst.Operands.size(); i != e; ++i) {
+    int TiedOpIdx = Inst.Operands[i].getTiedRegister();
+    if (-1 != TiedOpIdx) {
+      // Set the entry in OperandMap for the tied operand we're skipping.
+      OperandMap[i].Kind = OperandMap[TiedOpIdx].Kind;
+      OperandMap[i].Data = OperandMap[TiedOpIdx].Data;
+      TiedCount++;
+      continue;
+    }
+    if (DefInit *DI = dyn_cast<DefInit>(Dag->getArg(i - TiedCount))) {
+      if (DI->getDef()->isSubClassOf("Register")) {
+        // Check if the fixed register belongs to the Register class.
+        if (!validateRegister(DI->getDef(), Inst.Operands[i].Rec))
+          PrintFatalError(Rec->getLoc(),
+                          "Error in Dag '" + Dag->getAsString() +
+                              "'Register: '" + DI->getDef()->getName() +
+                              "' is not in register class '" +
+                              Inst.Operands[i].Rec->getName() + "'");
+        OperandMap[i].Kind = OpData::Reg;
+        OperandMap[i].Data.Reg = DI->getDef();
+        continue;
+      }
+      // Validate that Dag operand type matches the type defined in the
+      // corresponding instruction. Operands in the input Dag pattern are
+      // allowed to be a subclass of the type specified in corresponding
+      // instruction operand instead of being an exact match.
+      if (!validateTypes(DI->getDef(), Inst.Operands[i].Rec, IsSourceInst))
+        PrintFatalError(Rec->getLoc(),
+                        "Error in Dag '" + Dag->getAsString() + "'. Operand '" +
+                            Dag->getArgNameStr(i - TiedCount) + "' has type '" +
+                            DI->getDef()->getName() +
+                            "' which does not match the type '" +
+                            Inst.Operands[i].Rec->getName() +
+                            "' in the corresponding instruction operand!");
+
+      OperandMap[i].Kind = OpData::Operand;
+    } else if (IntInit *II = dyn_cast<IntInit>(Dag->getArg(i - TiedCount))) {
+      // Validate that corresponding instruction operand expects an immediate.
+      if (Inst.Operands[i].Rec->isSubClassOf("RegisterClass"))
+        PrintFatalError(
+            Rec->getLoc(),
+            ("Error in Dag '" + Dag->getAsString() + "' Found immediate: '" +
+             II->getAsString() +
+             "' but corresponding instruction operand expected a register!"));
+      // No pattern validation check possible for values of fixed immediate.
+      OperandMap[i].Kind = OpData::Imm;
+      OperandMap[i].Data.Imm = II->getValue();
+      LLVM_DEBUG(
+          dbgs() << "  Found immediate '" << II->getValue() << "' at "
+                 << (IsSourceInst ? "input " : "output ")
+                 << "Dag. No validation time check possible for values of "
+                    "fixed immediate.\n");
+    } else
+      llvm_unreachable("Unhandled CompressPat argument type!");
+  }
+}
+
+// Verify the Dag operand count is enough to build an instruction.
+static bool verifyDagOpCount(CodeGenInstruction &Inst, DagInit *Dag,
+                             bool IsSource) {
+  if (Dag->getNumArgs() == Inst.Operands.size())
+    return true;
+  // Source instructions are non compressed instructions and don't have tied
+  // operands.
+  if (IsSource)
+    PrintFatalError("Input operands for Inst '" + Inst.TheDef->getName() +
+                    "' and input Dag operand count mismatch");
+  // The Dag can't have more arguments than the Instruction.
+  if (Dag->getNumArgs() > Inst.Operands.size())
+    PrintFatalError("Inst '" + Inst.TheDef->getName() +
+                    "' and Dag operand count mismatch");
+
+  // The Instruction might have tied operands so the Dag might have
+  //  a fewer operand count.
+  unsigned RealCount = Inst.Operands.size();
+  for (unsigned i = 0; i < Inst.Operands.size(); i++)
+    if (Inst.Operands[i].getTiedRegister() != -1)
+      --RealCount;
+
+  if (Dag->getNumArgs() != RealCount)
+    PrintFatalError("Inst '" + Inst.TheDef->getName() +
+                    "' and Dag operand count mismatch");
+  return true;
+}
+
+static bool validateArgsTypes(Init *Arg1, Init *Arg2) {
+  DefInit *Type1 = dyn_cast<DefInit>(Arg1);
+  DefInit *Type2 = dyn_cast<DefInit>(Arg2);
+  assert(Type1 && ("Arg1 type not found\n"));
+  assert(Type2 && ("Arg2 type not found\n"));
+  return Type1->getDef() == Type2->getDef();
+}
+
+// Creates a mapping between the operand name in the Dag (e.g. $rs1) and
+// its index in the list of Dag operands and checks that operands with the same
+// name have the same types. For example in 'C_ADD $rs1, $rs2' we generate the
+// mapping $rs1 --> 0, $rs2 ---> 1. If the operand appears twice in the (tied)
+// same Dag we use the last occurrence for indexing.
+void RISCVCompressInstEmitter::createDagOperandMapping(
+    Record *Rec, StringMap<unsigned> &SourceOperands,
+    StringMap<unsigned> &DestOperands, DagInit *SourceDag, DagInit *DestDag,
+    IndexedMap<OpData> &SourceOperandMap) {
+  for (unsigned i = 0; i < DestDag->getNumArgs(); ++i) {
+    // Skip fixed immediates and registers, they were handled in
+    // addDagOperandMapping.
+    if ("" == DestDag->getArgNameStr(i))
+      continue;
+    DestOperands[DestDag->getArgNameStr(i)] = i;
+  }
+
+  for (unsigned i = 0; i < SourceDag->getNumArgs(); ++i) {
+    // Skip fixed immediates and registers, they were handled in
+    // addDagOperandMapping.
+    if ("" == SourceDag->getArgNameStr(i))
+      continue;
+
+    StringMap<unsigned>::iterator it =
+        SourceOperands.find(SourceDag->getArgNameStr(i));
+    if (it != SourceOperands.end()) {
+      // Operand sharing the same name in the Dag should be mapped as tied.
+      SourceOperandMap[i].TiedOpIdx = it->getValue();
+      if (!validateArgsTypes(SourceDag->getArg(it->getValue()),
+                             SourceDag->getArg(i)))
+        PrintFatalError(Rec->getLoc(),
+                        "Input Operand '" + SourceDag->getArgNameStr(i) +
+                            "' has a mismatched tied operand!\n");
+    }
+    it = DestOperands.find(SourceDag->getArgNameStr(i));
+    if (it == DestOperands.end())
+      PrintFatalError(Rec->getLoc(), "Operand " + SourceDag->getArgNameStr(i) +
+                                         " defined in Input Dag but not used in"
+                                         " Output Dag!\n");
+    // Input Dag operand types must match output Dag operand type.
+    if (!validateArgsTypes(DestDag->getArg(it->getValue()),
+                           SourceDag->getArg(i)))
+      PrintFatalError(Rec->getLoc(), "Type mismatch between Input and "
+                                     "Output Dag operand '" +
+                                         SourceDag->getArgNameStr(i) + "'!");
+    SourceOperands[SourceDag->getArgNameStr(i)] = i;
+  }
+}
+
+/// Map operand names in the Dag to their index in both corresponding input and
+/// output instructions. Validate that operands defined in the input are
+/// used in the output pattern while populating the maps.
+void RISCVCompressInstEmitter::createInstOperandMapping(
+    Record *Rec, DagInit *SourceDag, DagInit *DestDag,
+    IndexedMap<OpData> &SourceOperandMap, IndexedMap<OpData> &DestOperandMap,
+    StringMap<unsigned> &SourceOperands, CodeGenInstruction &DestInst) {
+  // TiedCount keeps track of the number of operands skipped in Inst
+  // operands list to get to the corresponding Dag operand.
+  unsigned TiedCount = 0;
+  LLVM_DEBUG(dbgs() << "  Operand mapping:\n  Source   Dest\n");
+  for (unsigned i = 0, e = DestInst.Operands.size(); i != e; ++i) {
+    int TiedInstOpIdx = DestInst.Operands[i].getTiedRegister();
+    if (TiedInstOpIdx != -1) {
+      ++TiedCount;
+      DestOperandMap[i].Data = DestOperandMap[TiedInstOpIdx].Data;
+      DestOperandMap[i].Kind = DestOperandMap[TiedInstOpIdx].Kind;
+      if (DestOperandMap[i].Kind == OpData::Operand)
+        // No need to fill the SourceOperandMap here since it was mapped to
+        // destination operand 'TiedInstOpIdx' in a previous iteration.
+        LLVM_DEBUG(dbgs() << "    " << DestOperandMap[i].Data.Operand
+                          << " ====> " << i
+                          << "  Dest operand tied with operand '"
+                          << TiedInstOpIdx << "'\n");
+      continue;
+    }
+    // Skip fixed immediates and registers, they were handled in
+    // addDagOperandMapping.
+    if (DestOperandMap[i].Kind != OpData::Operand)
+      continue;
+
+    unsigned DagArgIdx = i - TiedCount;
+    StringMap<unsigned>::iterator SourceOp =
+        SourceOperands.find(DestDag->getArgNameStr(DagArgIdx));
+    if (SourceOp == SourceOperands.end())
+      PrintFatalError(Rec->getLoc(),
+                      "Output Dag operand '" +
+                          DestDag->getArgNameStr(DagArgIdx) +
+                          "' has no matching input Dag operand.");
+
+    assert(DestDag->getArgNameStr(DagArgIdx) ==
+               SourceDag->getArgNameStr(SourceOp->getValue()) &&
+           "Incorrect operand mapping detected!\n");
+    DestOperandMap[i].Data.Operand = SourceOp->getValue();
+    SourceOperandMap[SourceOp->getValue()].Data.Operand = i;
+    LLVM_DEBUG(dbgs() << "    " << SourceOp->getValue() << " ====> " << i
+                      << "\n");
+  }
+}
+
+/// Validates the CompressPattern and create operand mapping.
+/// These are the checks to validate a CompressPat pattern declarations.
+/// Error out with message under these conditions:
+/// - Dag Input opcode is an expanded instruction and Dag Output opcode is a
+///   compressed instruction.
+/// - Operands in Dag Input must be all used in Dag Output.
+///   Register Operand type in Dag Input Type  must be contained in the
+///   corresponding Source Instruction type.
+/// - Register Operand type in Dag Input must be the  same as in  Dag Ouput.
+/// - Register Operand type in  Dag Output must be the same  as the
+///   corresponding Destination Inst type.
+/// - Immediate Operand type in Dag Input must be the same as in Dag Ouput.
+/// - Immediate Operand type in Dag Ouput must be the same as the corresponding
+///   Destination Instruction type.
+/// - Fixed register must be contained in the corresponding Source Instruction
+///   type.
+/// - Fixed register must be contained in the corresponding Destination
+///   Instruction type. Warning message printed under these conditions:
+/// - Fixed immediate in Dag Input or Dag Ouput cannot be checked at this time
+///   and generate warning.
+/// - Immediate operand type in Dag Input differs from the corresponding Source
+///   Instruction type  and generate a warning.
+void RISCVCompressInstEmitter::evaluateCompressPat(Record *Rec) {
+  // Validate input Dag operands.
+  DagInit *SourceDag = Rec->getValueAsDag("Input");
+  assert(SourceDag && "Missing 'Input' in compress pattern!");
+  LLVM_DEBUG(dbgs() << "Input: " << *SourceDag << "\n");
+
+  DefInit *OpDef = dyn_cast<DefInit>(SourceDag->getOperator());
+  if (!OpDef)
+    PrintFatalError(Rec->getLoc(),
+                    Rec->getName() + " has unexpected operator type!");
+  // Checking we are transforming from compressed to uncompressed instructions.
+  Record *Operator = OpDef->getDef();
+  if (!Operator->isSubClassOf("RVInst"))
+    PrintFatalError(Rec->getLoc(), "Input instruction '" + Operator->getName() +
+                                       "' is not a 32 bit wide instruction!");
+  CodeGenInstruction SourceInst(Operator);
+  verifyDagOpCount(SourceInst, SourceDag, true);
+
+  // Validate output Dag operands.
+  DagInit *DestDag = Rec->getValueAsDag("Output");
+  assert(DestDag && "Missing 'Output' in compress pattern!");
+  LLVM_DEBUG(dbgs() << "Output: " << *DestDag << "\n");
+
+  DefInit *DestOpDef = dyn_cast<DefInit>(DestDag->getOperator());
+  if (!DestOpDef)
+    PrintFatalError(Rec->getLoc(),
+                    Rec->getName() + " has unexpected operator type!");
+
+  Record *DestOperator = DestOpDef->getDef();
+  if (!DestOperator->isSubClassOf("RVInst16"))
+    PrintFatalError(Rec->getLoc(), "Output instruction  '" +
+                                       DestOperator->getName() +
+                                       "' is not a 16 bit wide instruction!");
+  CodeGenInstruction DestInst(DestOperator);
+  verifyDagOpCount(DestInst, DestDag, false);
+
+  // Fill the mapping from the source to destination instructions.
+
+  IndexedMap<OpData> SourceOperandMap;
+  SourceOperandMap.grow(SourceInst.Operands.size());
+  // Create a mapping between source Dag operands and source Inst operands.
+  addDagOperandMapping(Rec, SourceDag, SourceInst, SourceOperandMap,
+                       /*IsSourceInst*/ true);
+
+  IndexedMap<OpData> DestOperandMap;
+  DestOperandMap.grow(DestInst.Operands.size());
+  // Create a mapping between destination Dag operands and destination Inst
+  // operands.
+  addDagOperandMapping(Rec, DestDag, DestInst, DestOperandMap,
+                       /*IsSourceInst*/ false);
+
+  StringMap<unsigned> SourceOperands;
+  StringMap<unsigned> DestOperands;
+  createDagOperandMapping(Rec, SourceOperands, DestOperands, SourceDag, DestDag,
+                          SourceOperandMap);
+  // Create operand mapping between the source and destination instructions.
+  createInstOperandMapping(Rec, SourceDag, DestDag, SourceOperandMap,
+                           DestOperandMap, SourceOperands, DestInst);
+
+  // Get the target features for the CompressPat.
+  std::vector<Record *> PatReqFeatures;
+  std::vector<Record *> RF = Rec->getValueAsListOfDefs("Predicates");
+  copy_if(RF, std::back_inserter(PatReqFeatures), [](Record *R) {
+    return R->getValueAsBit("AssemblerMatcherPredicate");
+  });
+
+  CompressPatterns.push_back(CompressPat(SourceInst, DestInst, PatReqFeatures,
+                                         SourceOperandMap, DestOperandMap));
+}
+
+static void getReqFeatures(std::map<StringRef, int> &FeaturesMap,
+                           const std::vector<Record *> &ReqFeatures) {
+  for (auto &R : ReqFeatures) {
+    StringRef AsmCondString = R->getValueAsString("AssemblerCondString");
+
+    // AsmCondString has syntax [!]F(,[!]F)*
+    SmallVector<StringRef, 4> Ops;
+    SplitString(AsmCondString, Ops, ",");
+    assert(!Ops.empty() && "AssemblerCondString cannot be empty");
+
+    for (auto &Op : Ops) {
+      assert(!Op.empty() && "Empty operator");
+      if (FeaturesMap.find(Op) == FeaturesMap.end())
+        FeaturesMap[Op] = FeaturesMap.size();
+    }
+  }
+}
+
+unsigned getMCOpPredicate(DenseMap<const Record *, unsigned> &MCOpPredicateMap,
+                          std::vector<const Record *> &MCOpPredicates,
+                          Record *Rec) {
+  unsigned Entry = MCOpPredicateMap[Rec];
+  if (Entry)
+    return Entry;
+
+  if (!Rec->isValueUnset("MCOperandPredicate")) {
+    MCOpPredicates.push_back(Rec);
+    Entry = MCOpPredicates.size();
+    MCOpPredicateMap[Rec] = Entry;
+    return Entry;
+  }
+
+  PrintFatalError(Rec->getLoc(),
+                  "No MCOperandPredicate on this operand at all: " +
+                      Rec->getName().str() + "'");
+  return 0;
+}
+
+static std::string mergeCondAndCode(raw_string_ostream &CondStream,
+                                    raw_string_ostream &CodeStream) {
+  std::string S;
+  raw_string_ostream CombinedStream(S);
+  CombinedStream.indent(4)
+      << "if ("
+      << CondStream.str().substr(
+             6, CondStream.str().length() -
+                    10) // remove first indentation and last '&&'.
+      << ") {\n";
+  CombinedStream << CodeStream.str();
+  CombinedStream.indent(4) << "  return true;\n";
+  CombinedStream.indent(4) << "} // if\n";
+  return CombinedStream.str();
+}
+
+void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
+                                                       bool Compress) {
+  Record *AsmWriter = Target.getAsmWriter();
+  if (!AsmWriter->getValueAsInt("PassSubtarget"))
+    PrintFatalError("'PassSubtarget' is false. SubTargetInfo object is needed "
+                    "for target features.\n");
+
+  std::string Namespace = Target.getName();
+
+  // Sort entries in CompressPatterns to handle instructions that can have more
+  // than one candidate for compression\uncompression, e.g ADD can be
+  // transformed to a C_ADD or a C_MV. When emitting 'uncompress()' function the
+  // source and destination are flipped and the sort key needs to change
+  // accordingly.
+  std::stable_sort(CompressPatterns.begin(), CompressPatterns.end(),
+                   [Compress](const CompressPat &LHS, const CompressPat &RHS) {
+                     if (Compress)
+                       return (LHS.Source.TheDef->getName().str() <
+                               RHS.Source.TheDef->getName().str());
+                     else
+                       return (LHS.Dest.TheDef->getName().str() <
+                               RHS.Dest.TheDef->getName().str());
+                   });
+
+  // A list of MCOperandPredicates for all operands in use, and the reverse map.
+  std::vector<const Record *> MCOpPredicates;
+  DenseMap<const Record *, unsigned> MCOpPredicateMap;
+
+  std::string F;
+  std::string FH;
+  raw_string_ostream Func(F);
+  raw_string_ostream FuncH(FH);
+  bool NeedMRI = false;
+
+  if (Compress)
+    o << "\n#ifdef GEN_COMPRESS_INSTR\n"
+      << "#undef GEN_COMPRESS_INSTR\n\n";
+  else
+    o << "\n#ifdef GEN_UNCOMPRESS_INSTR\n"
+      << "#undef GEN_UNCOMPRESS_INSTR\n\n";
+
+  if (Compress) {
+    FuncH << "static bool compressInst(MCInst& OutInst,\n";
+    FuncH.indent(25) << "const MCInst &MI,\n";
+    FuncH.indent(25) << "const MCSubtargetInfo &STI,\n";
+    FuncH.indent(25) << "MCContext &Context) {\n";
+  } else {
+    FuncH << "static bool uncompressInst(MCInst& OutInst,\n";
+    FuncH.indent(27) << "const MCInst &MI,\n";
+    FuncH.indent(27) << "const MCRegisterInfo &MRI,\n";
+    FuncH.indent(27) << "const MCSubtargetInfo &STI) {\n";
+  }
+
+  if (CompressPatterns.empty()) {
+    o << FuncH.str();
+    o.indent(2) << "return false;\n}\n";
+    if (Compress)
+      o << "\n#endif //GEN_COMPRESS_INSTR\n";
+    else
+      o << "\n#endif //GEN_UNCOMPRESS_INSTR\n\n";
+    return;
+  }
+
+  std::string CaseString("");
+  raw_string_ostream CaseStream(CaseString);
+  std::string PrevOp("");
+  std::string CurOp("");
+  CaseStream << "  switch (MI.getOpcode()) {\n";
+  CaseStream << "    default: return false;\n";
+
+  for (auto &CompressPat : CompressPatterns) {
+    std::string CondString;
+    std::string CodeString;
+    raw_string_ostream CondStream(CondString);
+    raw_string_ostream CodeStream(CodeString);
+    CodeGenInstruction &Source =
+        Compress ? CompressPat.Source : CompressPat.Dest;
+    CodeGenInstruction &Dest = Compress ? CompressPat.Dest : CompressPat.Source;
+    IndexedMap<OpData> SourceOperandMap =
+        Compress ? CompressPat.SourceOperandMap : CompressPat.DestOperandMap;
+    IndexedMap<OpData> &DestOperandMap =
+        Compress ? CompressPat.DestOperandMap : CompressPat.SourceOperandMap;
+
+    CurOp = Source.TheDef->getName().str();
+    // Check current and previous opcode to decide to continue or end a case.
+    if (CurOp != PrevOp) {
+      if (PrevOp != "")
+        CaseStream.indent(6) << "break;\n    } // case " + PrevOp + "\n";
+      CaseStream.indent(4) << "case " + Namespace + "::" + CurOp + ": {\n";
+    }
+
+    std::map<StringRef, int> FeaturesMap;
+    // Add CompressPat required features.
+    getReqFeatures(FeaturesMap, CompressPat.PatReqFeatures);
+
+    // Add Dest instruction required features.
+    std::vector<Record *> ReqFeatures;
+    std::vector<Record *> RF = Dest.TheDef->getValueAsListOfDefs("Predicates");
+    copy_if(RF, std::back_inserter(ReqFeatures), [](Record *R) {
+      return R->getValueAsBit("AssemblerMatcherPredicate");
+    });
+    getReqFeatures(FeaturesMap, ReqFeatures);
+
+    // Emit checks for all required features.
+    for (auto &F : FeaturesMap) {
+      StringRef Op = F.first;
+      if (Op[0] == '!')
+        CondStream.indent(6) << ("!STI.getFeatureBits()[" + Namespace +
+                                 "::" + Op.substr(1) + "]")
+                                        .str() +
+                                    " &&\n";
+      else
+        CondStream.indent(6)
+            << ("STI.getFeatureBits()[" + Namespace + "::" + Op + "]").str() +
+                   " &&\n";
+    }
+
+    // Start Source Inst operands validation.
+    unsigned OpNo = 0;
+    for (OpNo = 0; OpNo < Source.Operands.size(); ++OpNo) {
+      if (SourceOperandMap[OpNo].TiedOpIdx != -1) {
+        if (Source.Operands[OpNo].Rec->isSubClassOf("RegisterClass"))
+          CondStream.indent(6)
+              << "(MI.getOperand("
+              << std::to_string(OpNo) + ").getReg() ==  MI.getOperand("
+              << std::to_string(SourceOperandMap[OpNo].TiedOpIdx)
+              << ").getReg()) &&\n";
+        else
+          PrintFatalError("Unexpected tied operand types!\n");
+      }
+      // Check for fixed immediates\registers in the source instruction.
+      switch (SourceOperandMap[OpNo].Kind) {
+      case OpData::Operand:
+        // We don't need to do anything for source instruction operand checks.
+        break;
+      case OpData::Imm:
+        CondStream.indent(6)
+            << "(MI.getOperand(" + std::to_string(OpNo) + ").isImm()) &&\n" +
+                   "      (MI.getOperand(" + std::to_string(OpNo) +
+                   ").getImm() == " +
+                   std::to_string(SourceOperandMap[OpNo].Data.Imm) + ") &&\n";
+        break;
+      case OpData::Reg: {
+        Record *Reg = SourceOperandMap[OpNo].Data.Reg;
+        CondStream.indent(6) << "(MI.getOperand(" + std::to_string(OpNo) +
+                                    ").getReg() == " + Namespace +
+                                    "::" + Reg->getName().str() + ") &&\n";
+        break;
+      }
+      }
+    }
+    CodeStream.indent(6) << "// " + Dest.AsmString + "\n";
+    CodeStream.indent(6) << "OutInst.setOpcode(" + Namespace +
+                                "::" + Dest.TheDef->getName().str() + ");\n";
+    OpNo = 0;
+    for (const auto &DestOperand : Dest.Operands) {
+      CodeStream.indent(6) << "// Operand: " + DestOperand.Name + "\n";
+      switch (DestOperandMap[OpNo].Kind) {
+      case OpData::Operand: {
+        unsigned OpIdx = DestOperandMap[OpNo].Data.Operand;
+        // Check that the operand in the Source instruction fits
+        // the type for the Dest instruction.
+        if (DestOperand.Rec->isSubClassOf("RegisterClass")) {
+          NeedMRI = true;
+          // This is a register operand. Check the register class.
+          // Don't check register class if this is a tied operand, it was done
+          // for the operand its tied to.
+          if (DestOperand.getTiedRegister() == -1)
+            CondStream.indent(6)
+                << "(MRI.getRegClass(" + Namespace +
+                       "::" + DestOperand.Rec->getName().str() +
+                       "RegClassID).contains(" + "MI.getOperand(" +
+                       std::to_string(OpIdx) + ").getReg())) &&\n";
+
+          CodeStream.indent(6) << "OutInst.addOperand(MI.getOperand(" +
+                                      std::to_string(OpIdx) + "));\n";
+        } else {
+          // Handling immediate operands.
+          unsigned Entry = getMCOpPredicate(MCOpPredicateMap, MCOpPredicates,
+                                            DestOperand.Rec);
+          CondStream.indent(6) << Namespace + "ValidateMCOperand(" +
+                                      "MI.getOperand(" + std::to_string(OpIdx) +
+                                      "), STI, " + std::to_string(Entry) +
+                                      ") &&\n";
+          CodeStream.indent(6) << "OutInst.addOperand(MI.getOperand(" +
+                                      std::to_string(OpIdx) + "));\n";
+        }
+        break;
+      }
+      case OpData::Imm: {
+        unsigned Entry =
+            getMCOpPredicate(MCOpPredicateMap, MCOpPredicates, DestOperand.Rec);
+        CondStream.indent(6)
+            << Namespace + "ValidateMCOperand(" + "MCOperand::createImm(" +
+                   std::to_string(DestOperandMap[OpNo].Data.Imm) + "), STI, " +
+                   std::to_string(Entry) + ") &&\n";
+        CodeStream.indent(6)
+            << "OutInst.addOperand(MCOperand::createImm(" +
+                   std::to_string(DestOperandMap[OpNo].Data.Imm) + "));\n";
+      } break;
+      case OpData::Reg: {
+        // Fixed register has been validated at pattern validation time.
+        Record *Reg = DestOperandMap[OpNo].Data.Reg;
+        CodeStream.indent(6) << "OutInst.addOperand(MCOperand::createReg(" +
+                                    Namespace + "::" + Reg->getName().str() +
+                                    "));\n";
+      } break;
+      }
+      ++OpNo;
+    }
+    CaseStream << mergeCondAndCode(CondStream, CodeStream);
+    PrevOp = CurOp;
+  }
+  Func << CaseStream.str() << "\n";
+  // Close brace for the last case.
+  Func.indent(4) << "} // case " + CurOp + "\n";
+  Func.indent(2) << "} // switch\n";
+  Func.indent(2) << "return false;\n}\n";
+
+  if (!MCOpPredicates.empty()) {
+    o << "static bool " << Namespace
+      << "ValidateMCOperand(const MCOperand &MCOp,\n"
+      << "                  const MCSubtargetInfo &STI,\n"
+      << "                  unsigned PredicateIndex) {\n"
+      << "  switch (PredicateIndex) {\n"
+      << "  default:\n"
+      << "    llvm_unreachable(\"Unknown MCOperandPredicate kind\");\n"
+      << "    break;\n";
+
+    for (unsigned i = 0; i < MCOpPredicates.size(); ++i) {
+      Init *MCOpPred = MCOpPredicates[i]->getValueInit("MCOperandPredicate");
+      if (CodeInit *SI = dyn_cast<CodeInit>(MCOpPred))
+        o << "  case " << i + 1 << ": {\n"
+          << "   // " << MCOpPredicates[i]->getName().str() << SI->getValue()
+          << "\n"
+          << "    }\n";
+      else
+        llvm_unreachable("Unexpected MCOperandPredicate field!");
+    }
+    o << "  }\n"
+      << "}\n\n";
+  }
+
+  o << FuncH.str();
+  if (NeedMRI && Compress)
+    o.indent(2) << "const MCRegisterInfo &MRI = *Context.getRegisterInfo();\n";
+  o << Func.str();
+
+  if (Compress)
+    o << "\n#endif //GEN_COMPRESS_INSTR\n";
+  else
+    o << "\n#endif //GEN_UNCOMPRESS_INSTR\n\n";
+}
+
+void RISCVCompressInstEmitter::run(raw_ostream &o) {
+  Record *CompressClass = Records.getClass("CompressPat");
+  assert(CompressClass && "Compress class definition missing!");
+  std::vector<Record *> Insts;
+  for (const auto &D : Records.getDefs()) {
+    if (D.second->isSubClassOf(CompressClass))
+      Insts.push_back(D.second.get());
+  }
+
+  // Process the CompressPat definitions, validating them as we do so.
+  for (unsigned i = 0, e = Insts.size(); i != e; ++i)
+    evaluateCompressPat(Insts[i]);
+
+  // Emit file header.
+  emitSourceFileHeader("Compress instruction Source Fragment", o);
+  // Generate compressInst() function.
+  emitCompressInstEmitter(o, true);
+  // Generate uncompressInst() function.
+  emitCompressInstEmitter(o, false);
+}
+
+namespace llvm {
+
+void EmitCompressInst(RecordKeeper &RK, raw_ostream &OS) {
+  RISCVCompressInstEmitter(RK).run(OS);
+}
+
+} // namespace llvm
diff --git a/utils/TableGen/RegisterBankEmitter.cpp b/utils/TableGen/RegisterBankEmitter.cpp
index 5c6471688044..879b4162d629 100644
--- a/utils/TableGen/RegisterBankEmitter.cpp
+++ b/utils/TableGen/RegisterBankEmitter.cpp
@@ -291,9 +291,11 @@ void RegisterBankEmitter::run(raw_ostream &OS) {
       visitRegisterBankClasses(
           RegisterClassHierarchy, RC, "explicit",
           [&Bank](const CodeGenRegisterClass *RC, StringRef Kind) {
-            DEBUG(dbgs() << "Added " << RC->getName() << "(" << Kind << ")\n");
+            LLVM_DEBUG(dbgs()
+                       << "Added " << RC->getName() << "(" << Kind << ")\n");
             Bank.addRegisterClass(RC);
-          }, VisitedRCs);
+          },
+          VisitedRCs);
     }
 
     Banks.push_back(Bank);
diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp
index 7eef2337c140..49016cca799e 100644
--- a/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/utils/TableGen/RegisterInfoEmitter.cpp
@@ -15,19 +15,19 @@
 
 #include "CodeGenRegisters.h"
 #include "CodeGenTarget.h"
-#include "Types.h"
 #include "SequenceToOffsetTable.h"
+#include "Types.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SparseBitVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Twine.h"
-#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Format.h"
+#include "llvm/Support/MachineValueType.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
@@ -203,11 +203,11 @@ EmitRegUnitPressure(raw_ostream &OS, const CodeGenRegBank &RegBank,
      << "  static const RegClassWeight RCWeightTable[] = {\n";
   for (const auto &RC : RegBank.getRegClasses()) {
     const CodeGenRegister::Vec &Regs = RC.getMembers();
-    if (Regs.empty())
+    if (Regs.empty() || RC.Artificial)
       OS << "    {0, 0";
     else {
       std::vector<unsigned> RegUnits;
-      RC.buildRegUnitSet(RegUnits);
+      RC.buildRegUnitSet(RegBank, RegUnits);
       OS << "    {" << (*Regs.begin())->getWeight(RegBank)
          << ", " << RegBank.getRegUnitSetWeight(RegUnits);
     }
@@ -296,7 +296,7 @@ EmitRegUnitPressure(raw_ostream &OS, const CodeGenRegBank &RegBank,
            PSetE = PSetIDs.end(); PSetI != PSetE; ++PSetI) {
       PSets[i].push_back(RegBank.getRegPressureSet(*PSetI).Order);
     }
-    std::sort(PSets[i].begin(), PSets[i].end());
+    llvm::sort(PSets[i].begin(), PSets[i].end());
     PSetsSeqs.add(PSets[i]);
   }
 
diff --git a/utils/TableGen/SearchableTableEmitter.cpp b/utils/TableGen/SearchableTableEmitter.cpp
index 63252e8c0391..664de2217e94 100644
--- a/utils/TableGen/SearchableTableEmitter.cpp
+++ b/utils/TableGen/SearchableTableEmitter.cpp
@@ -13,23 +13,85 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
+#include "CodeGenIntrinsics.h"
 #include <algorithm>
+#include <set>
 #include <string>
 #include <vector>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "searchable-table-emitter"
 
 namespace {
 
+struct GenericTable;
+
+int getAsInt(Init *B) {
+  return cast<IntInit>(B->convertInitializerTo(IntRecTy::get()))->getValue();
+}
+int getInt(Record *R, StringRef Field) {
+  return getAsInt(R->getValueInit(Field));
+}
+
+struct GenericEnum {
+  using Entry = std::pair<StringRef, int64_t>;
+
+  std::string Name;
+  Record *Class;
+  std::string PreprocessorGuard;
+  std::vector<std::unique_ptr<Entry>> Entries;
+  DenseMap<Record *, Entry *> EntryMap;
+};
+
+struct GenericField {
+  std::string Name;
+  RecTy *RecType = nullptr;
+  bool IsIntrinsic = false;
+  bool IsInstruction = false;
+  GenericEnum *Enum = nullptr;
+
+  GenericField(StringRef Name) : Name(Name) {}
+};
+
+struct SearchIndex {
+  std::string Name;
+  SmallVector<GenericField, 1> Fields;
+  bool EarlyOut;
+};
+
+struct GenericTable {
+  std::string Name;
+  std::string PreprocessorGuard;
+  std::string CppTypeName;
+  SmallVector<GenericField, 2> Fields;
+  std::vector<Record *> Entries;
+
+  std::unique_ptr<SearchIndex> PrimaryKey;
+  SmallVector<std::unique_ptr<SearchIndex>, 2> Indices;
+
+  const GenericField *getFieldByName(StringRef Name) const {
+    for (const auto &Field : Fields) {
+      if (Name == Field.Name)
+        return &Field;
+    }
+    return nullptr;
+  }
+};
+
 class SearchableTableEmitter {
   RecordKeeper &Records;
+  DenseMap<Init *, std::unique_ptr<CodeGenIntrinsic>> Intrinsics;
+  std::vector<std::unique_ptr<GenericEnum>> Enums;
+  DenseMap<Record *, GenericEnum *> EnumMap;
+  std::set<std::string> PreprocessorGuards;
 
 public:
   SearchableTableEmitter(RecordKeeper &R) : Records(R) {}
@@ -39,38 +101,58 @@ public:
 private:
   typedef std::pair<Init *, int> SearchTableEntry;
 
-  int getAsInt(BitsInit *B) {
-    return cast<IntInit>(B->convertInitializerTo(IntRecTy::get()))->getValue();
-  }
-  int getInt(Record *R, StringRef Field) {
-    return getAsInt(R->getValueAsBitsInit(Field));
-  }
+  enum TypeContext {
+    TypeInStaticStruct,
+    TypeInTempStruct,
+    TypeInArgument,
+  };
 
-  std::string primaryRepresentation(Init *I) {
+  std::string primaryRepresentation(const GenericField &Field, Init *I) {
     if (StringInit *SI = dyn_cast<StringInit>(I))
       return SI->getAsString();
     else if (BitsInit *BI = dyn_cast<BitsInit>(I))
       return "0x" + utohexstr(getAsInt(BI));
     else if (BitInit *BI = dyn_cast<BitInit>(I))
       return BI->getValue() ? "true" : "false";
-    else if (CodeInit *CI = dyn_cast<CodeInit>(I)) {
+    else if (CodeInit *CI = dyn_cast<CodeInit>(I))
       return CI->getValue();
-    }
-    PrintFatalError(SMLoc(),
-                    "invalid field type, expected: string, bits, bit or code");
+    else if (Field.IsIntrinsic)
+      return "Intrinsic::" + getIntrinsic(I).EnumName;
+    else if (Field.IsInstruction)
+      return I->getAsString();
+    else if (Field.Enum)
+      return Field.Enum->EntryMap[cast<DefInit>(I)->getDef()]->first;
+    PrintFatalError(Twine("invalid field type for field '") + Field.Name +
+                    "', expected: string, bits, bit or code");
+  }
+
+  bool isIntrinsic(Init *I) {
+    if (DefInit *DI = dyn_cast<DefInit>(I))
+      return DI->getDef()->isSubClassOf("Intrinsic");
+    return false;
+  }
+
+  CodeGenIntrinsic &getIntrinsic(Init *I) {
+    std::unique_ptr<CodeGenIntrinsic> &Intr = Intrinsics[I];
+    if (!Intr)
+      Intr = make_unique<CodeGenIntrinsic>(cast<DefInit>(I)->getDef());
+    return *Intr;
   }
 
-  std::string searchRepresentation(Init *I) {
-    std::string PrimaryRep = primaryRepresentation(I);
-    if (!isa<StringInit>(I))
-      return PrimaryRep;
-    return StringRef(PrimaryRep).upper();
+  bool compareBy(Record *LHS, Record *RHS, const SearchIndex &Index);
+
+  bool isIntegral(Init *I) {
+    return isa<BitsInit>(I) || isIntrinsic(I);
   }
 
-  std::string searchableFieldType(Init *I) {
-    if (isa<StringInit>(I))
-      return "const char *";
-    else if (BitsInit *BI = dyn_cast<BitsInit>(I)) {
+  std::string searchableFieldType(const GenericField &Field, TypeContext Ctx) {
+    if (isa<StringRecTy>(Field.RecType)) {
+      if (Ctx == TypeInStaticStruct)
+        return "const char *";
+      if (Ctx == TypeInTempStruct)
+        return "std::string";
+      return "StringRef";
+    } else if (BitsRecTy *BI = dyn_cast<BitsRecTy>(Field.RecType)) {
       unsigned NumBits = BI->getNumBits();
       if (NumBits <= 8)
         NumBits = 8;
@@ -81,233 +163,617 @@ private:
       else if (NumBits <= 64)
         NumBits = 64;
       else
-        PrintFatalError(SMLoc(), "bitfield too large to search");
+        PrintFatalError(Twine("bitfield '") + Field.Name +
+                        "' too large to search");
       return "uint" + utostr(NumBits) + "_t";
-    }
-    PrintFatalError(SMLoc(), "Unknown type to search by");
+    } else if (Field.Enum || Field.IsIntrinsic || Field.IsInstruction)
+      return "unsigned";
+    PrintFatalError(Twine("Field '") + Field.Name + "' has unknown type '" +
+                    Field.RecType->getAsString() + "' to search by");
   }
 
-  void emitMapping(Record *MappingDesc, raw_ostream &OS);
-  void emitMappingEnum(std::vector<Record *> &Items, Record *InstanceClass,
-                       raw_ostream &OS);
-  void
-  emitPrimaryTable(StringRef Name, std::vector<std::string> &FieldNames,
-                   std::vector<std::string> &SearchFieldNames,
-                   std::vector<std::vector<SearchTableEntry>> &SearchTables,
-                   std::vector<Record *> &Items, raw_ostream &OS);
-  void emitSearchTable(StringRef Name, StringRef Field,
-                       std::vector<SearchTableEntry> &SearchTable,
-                       raw_ostream &OS);
-  void emitLookupDeclaration(StringRef Name, StringRef Field, Init *I,
-                             raw_ostream &OS);
-  void emitLookupFunction(StringRef Name, StringRef Field, Init *I,
-                          raw_ostream &OS);
+  void emitGenericTable(const GenericTable &Table, raw_ostream &OS);
+  void emitGenericEnum(const GenericEnum &Enum, raw_ostream &OS);
+  void emitLookupDeclaration(const GenericTable &Table,
+                             const SearchIndex &Index, raw_ostream &OS);
+  void emitLookupFunction(const GenericTable &Table, const SearchIndex &Index,
+                          bool IsPrimary, raw_ostream &OS);
+  void emitIfdef(StringRef Guard, raw_ostream &OS);
+
+  bool parseFieldType(GenericField &Field, Init *II);
+  std::unique_ptr<SearchIndex>
+  parseSearchIndex(GenericTable &Table, StringRef Name,
+                   const std::vector<StringRef> &Key, bool EarlyOut);
+  void collectEnumEntries(GenericEnum &Enum, StringRef NameField,
+                          StringRef ValueField,
+                          const std::vector<Record *> &Items);
+  void collectTableEntries(GenericTable &Table,
+                           const std::vector<Record *> &Items);
 };
 
 } // End anonymous namespace.
 
-/// Emit an enum providing symbolic access to some preferred field from
-/// C++.
-void SearchableTableEmitter::emitMappingEnum(std::vector<Record *> &Items,
-                                             Record *InstanceClass,
-                                             raw_ostream &OS) {
-  StringRef EnumNameField = InstanceClass->getValueAsString("EnumNameField");
-  StringRef EnumValueField;
-  if (!InstanceClass->isValueUnset("EnumValueField"))
-    EnumValueField = InstanceClass->getValueAsString("EnumValueField");
-
-  OS << "enum " << InstanceClass->getName() << "Values {\n";
-  for (auto Item : Items) {
-    OS << "  " << Item->getValueAsString(EnumNameField);
-    if (EnumValueField != StringRef())
-      OS << " = " << getInt(Item, EnumValueField);
-    OS << ",\n";
+// For search indices that consists of a single field whose numeric value is
+// known, return that numeric value.
+static int64_t getNumericKey(const SearchIndex &Index, Record *Rec) {
+  assert(Index.Fields.size() == 1);
+
+  if (Index.Fields[0].Enum) {
+    Record *EnumEntry = Rec->getValueAsDef(Index.Fields[0].Name);
+    return Index.Fields[0].Enum->EntryMap[EnumEntry]->second;
   }
-  OS << "};\n\n";
-}
 
-void SearchableTableEmitter::emitPrimaryTable(
-    StringRef Name, std::vector<std::string> &FieldNames,
-    std::vector<std::string> &SearchFieldNames,
-    std::vector<std::vector<SearchTableEntry>> &SearchTables,
-    std::vector<Record *> &Items, raw_ostream &OS) {
-  OS << "const " << Name << " " << Name << "sList[] = {\n";
+  return getInt(Rec, Index.Fields[0].Name);
+}
 
-  for (auto Item : Items) {
-    OS << "  { ";
-    for (unsigned i = 0; i < FieldNames.size(); ++i) {
-      OS << primaryRepresentation(Item->getValueInit(FieldNames[i]));
-      if (i != FieldNames.size() - 1)
-        OS << ", ";
+/// Less-than style comparison between \p LHS and \p RHS according to the
+/// key of \p Index.
+bool SearchableTableEmitter::compareBy(Record *LHS, Record *RHS,
+                                       const SearchIndex &Index) {
+  for (const auto &Field : Index.Fields) {
+    Init *LHSI = LHS->getValueInit(Field.Name);
+    Init *RHSI = RHS->getValueInit(Field.Name);
+
+    if (isa<BitsRecTy>(Field.RecType) || isa<IntRecTy>(Field.RecType)) {
+      int64_t LHSi = getAsInt(LHSI);
+      int64_t RHSi = getAsInt(RHSI);
+      if (LHSi < RHSi)
+        return true;
+      if (LHSi > RHSi)
+        return false;
+    } else if (Field.IsIntrinsic) {
+      CodeGenIntrinsic &LHSi = getIntrinsic(LHSI);
+      CodeGenIntrinsic &RHSi = getIntrinsic(RHSI);
+      if (std::tie(LHSi.TargetPrefix, LHSi.Name) <
+          std::tie(RHSi.TargetPrefix, RHSi.Name))
+        return true;
+      if (std::tie(LHSi.TargetPrefix, LHSi.Name) >
+          std::tie(RHSi.TargetPrefix, RHSi.Name))
+        return false;
+    } else if (Field.IsInstruction) {
+      // This does not correctly compare the predefined instructions!
+      Record *LHSr = cast<DefInit>(LHSI)->getDef();
+      Record *RHSr = cast<DefInit>(RHSI)->getDef();
+
+      bool LHSpseudo = LHSr->getValueAsBit("isPseudo");
+      bool RHSpseudo = RHSr->getValueAsBit("isPseudo");
+      if (LHSpseudo && !RHSpseudo)
+        return true;
+      if (!LHSpseudo && RHSpseudo)
+        return false;
+
+      int comp = LHSr->getName().compare(RHSr->getName());
+      if (comp < 0)
+        return true;
+      if (comp > 0)
+        return false;
+    } else if (Field.Enum) {
+      auto LHSr = cast<DefInit>(LHSI)->getDef();
+      auto RHSr = cast<DefInit>(RHSI)->getDef();
+      int64_t LHSv = Field.Enum->EntryMap[LHSr]->second;
+      int64_t RHSv = Field.Enum->EntryMap[RHSr]->second;
+      if (LHSv < RHSv)
+        return true;
+      if (LHSv > RHSv)
+        return false;
+    } else {
+      std::string LHSs = primaryRepresentation(Field, LHSI);
+      std::string RHSs = primaryRepresentation(Field, RHSI);
+
+      if (isa<StringRecTy>(Field.RecType)) {
+        LHSs = StringRef(LHSs).upper();
+        RHSs = StringRef(RHSs).upper();
+      }
+
+      int comp = LHSs.compare(RHSs);
+      if (comp < 0)
+        return true;
+      if (comp > 0)
+        return false;
     }
-    OS << "},\n";
   }
-  OS << "};\n\n";
+  return false;
 }
 
-void SearchableTableEmitter::emitSearchTable(
-    StringRef Name, StringRef Field, std::vector<SearchTableEntry> &SearchTable,
-    raw_ostream &OS) {
-  OS << "const std::pair<" << searchableFieldType(SearchTable[0].first)
-     << ", int> " << Name << "sBy" << Field << "[] = {\n";
-
-  if (isa<BitsInit>(SearchTable[0].first)) {
-    std::stable_sort(SearchTable.begin(), SearchTable.end(),
-                     [this](const SearchTableEntry &LHS,
-                            const SearchTableEntry &RHS) {
-                       return getAsInt(cast<BitsInit>(LHS.first)) <
-                              getAsInt(cast<BitsInit>(RHS.first));
-                     });
+void SearchableTableEmitter::emitIfdef(StringRef Guard, raw_ostream &OS) {
+  OS << "#ifdef " << Guard << "\n";
+  PreprocessorGuards.insert(Guard);
+}
+
+/// Emit a generic enum.
+void SearchableTableEmitter::emitGenericEnum(const GenericEnum &Enum,
+                                             raw_ostream &OS) {
+  emitIfdef((Twine("GET_") + Enum.PreprocessorGuard + "_DECL").str(), OS);
+
+  OS << "enum " << Enum.Name << " {\n";
+  for (const auto &Entry : Enum.Entries)
+    OS << "  " << Entry->first << " = " << Entry->second << ",\n";
+  OS << "};\n";
+
+  OS << "#endif\n\n";
+}
+
+void SearchableTableEmitter::emitLookupFunction(const GenericTable &Table,
+                                                const SearchIndex &Index,
+                                                bool IsPrimary,
+                                                raw_ostream &OS) {
+  OS << "\n";
+  emitLookupDeclaration(Table, Index, OS);
+  OS << " {\n";
+
+  std::vector<Record *> IndexRowsStorage;
+  ArrayRef<Record *> IndexRows;
+  StringRef IndexTypeName;
+  StringRef IndexName;
+
+  if (IsPrimary) {
+    IndexTypeName = Table.CppTypeName;
+    IndexName = Table.Name;
+    IndexRows = Table.Entries;
   } else {
-    std::stable_sort(SearchTable.begin(), SearchTable.end(),
-                     [this](const SearchTableEntry &LHS,
-                            const SearchTableEntry &RHS) {
-                       return searchRepresentation(LHS.first) <
-                              searchRepresentation(RHS.first);
+    OS << "  struct IndexType {\n";
+    for (const auto &Field : Index.Fields) {
+      OS << "    " << searchableFieldType(Field, TypeInStaticStruct) << " "
+         << Field.Name << ";\n";
+    }
+    OS << "    unsigned _index;\n";
+    OS << "  };\n";
+
+    OS << "  static const struct IndexType Index[] = {\n";
+
+    std::vector<std::pair<Record *, unsigned>> Entries;
+    Entries.reserve(Table.Entries.size());
+    for (unsigned i = 0; i < Table.Entries.size(); ++i)
+      Entries.emplace_back(Table.Entries[i], i);
+
+    std::stable_sort(Entries.begin(), Entries.end(),
+                     [&](const std::pair<Record *, unsigned> &LHS,
+                         const std::pair<Record *, unsigned> &RHS) {
+                       return compareBy(LHS.first, RHS.first, Index);
                      });
+
+    IndexRowsStorage.reserve(Entries.size());
+    for (const auto &Entry : Entries) {
+      IndexRowsStorage.push_back(Entry.first);
+
+      OS << "    { ";
+      bool NeedComma = false;
+      for (const auto &Field : Index.Fields) {
+        if (NeedComma)
+          OS << ", ";
+        NeedComma = true;
+
+        std::string Repr =
+            primaryRepresentation(Field, Entry.first->getValueInit(Field.Name));
+        if (isa<StringRecTy>(Field.RecType))
+          Repr = StringRef(Repr).upper();
+        OS << Repr;
+      }
+      OS << ", " << Entry.second << " },\n";
+    }
+
+    OS << "  };\n\n";
+
+    IndexTypeName = "IndexType";
+    IndexName = "Index";
+    IndexRows = IndexRowsStorage;
+  }
+
+  bool IsContiguous = false;
+
+  if (Index.Fields.size() == 1 &&
+      (Index.Fields[0].Enum || isa<BitsRecTy>(Index.Fields[0].RecType))) {
+    IsContiguous = true;
+    for (unsigned i = 0; i < IndexRows.size(); ++i) {
+      if (getNumericKey(Index, IndexRows[i]) != i) {
+        IsContiguous = false;
+        break;
+      }
+    }
   }
 
-  for (auto Entry : SearchTable) {
-    OS << "  { " << searchRepresentation(Entry.first) << ", " << Entry.second
-       << " },\n";
+  if (IsContiguous) {
+    OS << "  auto Table = makeArrayRef(" << IndexName << ");\n";
+    OS << "  size_t Idx = " << Index.Fields[0].Name << ";\n";
+    OS << "  return Idx >= Table.size() ? nullptr : ";
+    if (IsPrimary)
+      OS << "&Table[Idx]";
+    else
+      OS << "&" << Table.Name << "[Table[Idx]._index]";
+    OS << ";\n";
+    OS << "}\n";
+    return;
   }
-  OS << "};\n\n";
-}
 
-void SearchableTableEmitter::emitLookupFunction(StringRef Name, StringRef Field,
-                                                Init *I, raw_ostream &OS) {
-  bool IsIntegral = isa<BitsInit>(I);
-  std::string FieldType = searchableFieldType(I);
-  std::string PairType = "std::pair<" + FieldType + ", int>";
-
-  // const SysRegs *lookupSysRegByName(const char *Name) {
-  OS << "const " << Name << " *"
-     << "lookup" << Name << "By" << Field;
-  OS << "(" << (IsIntegral ? FieldType : "StringRef") << " " << Field
-     << ") {\n";
-
-  if (IsIntegral) {
-    OS << "  auto CanonicalVal = " << Field << ";\n";
-    OS << " " << PairType << " Val = {CanonicalVal, 0};\n";
-  } else {
-    // Make sure the result is null terminated because it's going via "char *".
-    OS << "  std::string CanonicalVal = " << Field << ".upper();\n";
-    OS << "  " << PairType << " Val = {CanonicalVal.c_str(), 0};\n";
+  if (Index.EarlyOut) {
+    const GenericField &Field = Index.Fields[0];
+    std::string FirstRepr =
+        primaryRepresentation(Field, IndexRows[0]->getValueInit(Field.Name));
+    std::string LastRepr = primaryRepresentation(
+        Field, IndexRows.back()->getValueInit(Field.Name));
+    OS << "  if ((" << Field.Name << " < " << FirstRepr << ") ||\n";
+    OS << "      (" << Field.Name << " > " << LastRepr << "))\n";
+    OS << "    return nullptr;\n\n";
   }
 
-  OS << "  ArrayRef<" << PairType << "> Table(" << Name << "sBy" << Field
-     << ");\n";
-  OS << "  auto Idx = std::lower_bound(Table.begin(), Table.end(), Val";
-
-  if (IsIntegral)
-    OS << ");\n";
-  else {
-    OS << ",\n                              ";
-    OS << "[](const " << PairType << " &LHS, const " << PairType
-       << " &RHS) {\n";
-    OS << "    return std::strcmp(LHS.first, RHS.first) < 0;\n";
-    OS << "  });\n\n";
+  OS << "  struct KeyType {\n";
+  for (const auto &Field : Index.Fields) {
+    OS << "    " << searchableFieldType(Field, TypeInTempStruct) << " "
+       << Field.Name << ";\n";
+  }
+  OS << "  };\n";
+  OS << "  KeyType Key = { ";
+  bool NeedComma = false;
+  for (const auto &Field : Index.Fields) {
+    if (NeedComma)
+      OS << ", ";
+    NeedComma = true;
+
+    OS << Field.Name;
+    if (isa<StringRecTy>(Field.RecType)) {
+      OS << ".upper()";
+      if (IsPrimary)
+        PrintFatalError(Twine("Use a secondary index for case-insensitive "
+                              "comparison of field '") +
+                        Field.Name + "' in table '" + Table.Name + "'");
+    }
+  }
+  OS << " };\n";
+
+  OS << "  auto Table = makeArrayRef(" << IndexName << ");\n";
+  OS << "  auto Idx = std::lower_bound(Table.begin(), Table.end(), Key,\n";
+  OS << "    [](const " << IndexTypeName << " &LHS, const KeyType &RHS) {\n";
+
+  for (const auto &Field : Index.Fields) {
+    if (isa<StringRecTy>(Field.RecType)) {
+      OS << "      int Cmp" << Field.Name << " = StringRef(LHS." << Field.Name
+         << ").compare(RHS." << Field.Name << ");\n";
+      OS << "      if (Cmp" << Field.Name << " < 0) return true;\n";
+      OS << "      if (Cmp" << Field.Name << " > 0) return false;\n";
+    } else {
+      OS << "      if (LHS." << Field.Name << " < RHS." << Field.Name << ")\n";
+      OS << "        return true;\n";
+      OS << "      if (LHS." << Field.Name << " > RHS." << Field.Name << ")\n";
+      OS << "        return false;\n";
+    }
   }
 
-  OS << "  if (Idx == Table.end() || CanonicalVal != Idx->first)\n";
-  OS << "    return nullptr;\n";
+  OS << "      return false;\n";
+  OS << "    });\n\n";
 
-  OS << "  return &" << Name << "sList[Idx->second];\n";
-  OS << "}\n\n";
+  OS << "  if (Idx == Table.end()";
+
+  for (const auto &Field : Index.Fields)
+    OS << " ||\n      Key." << Field.Name << " != Idx->" << Field.Name;
+  OS << ")\n    return nullptr;\n";
+
+  if (IsPrimary)
+    OS << "  return &*Idx;\n";
+  else
+    OS << "  return &" << Table.Name << "[Idx->_index];\n";
+
+  OS << "}\n";
 }
 
-void SearchableTableEmitter::emitLookupDeclaration(StringRef Name,
-                                                   StringRef Field, Init *I,
+void SearchableTableEmitter::emitLookupDeclaration(const GenericTable &Table,
+                                                   const SearchIndex &Index,
                                                    raw_ostream &OS) {
-  bool IsIntegral = isa<BitsInit>(I);
-  std::string FieldType = searchableFieldType(I);
-  OS << "const " << Name << " *"
-     << "lookup" << Name << "By" << Field;
-  OS << "(" << (IsIntegral ? FieldType : "StringRef") << " " << Field
-     << ");\n\n";
+  OS << "const " << Table.CppTypeName << " *" << Index.Name << "(";
+
+  bool NeedComma = false;
+  for (const auto &Field : Index.Fields) {
+    if (NeedComma)
+      OS << ", ";
+    NeedComma = true;
+
+    OS << searchableFieldType(Field, TypeInArgument) << " " << Field.Name;
+  }
+  OS << ")";
 }
 
-void SearchableTableEmitter::emitMapping(Record *InstanceClass,
-                                         raw_ostream &OS) {
-  StringRef TableName = InstanceClass->getName();
-  std::vector<Record *> Items = Records.getAllDerivedDefinitions(TableName);
+void SearchableTableEmitter::emitGenericTable(const GenericTable &Table,
+                                              raw_ostream &OS) {
+  emitIfdef((Twine("GET_") + Table.PreprocessorGuard + "_DECL").str(), OS);
 
-  // Gather all the records we're going to need for this particular mapping.
-  std::vector<std::vector<SearchTableEntry>> SearchTables;
-  std::vector<std::string> SearchFieldNames;
+  // Emit the declarations for the functions that will perform lookup.
+  if (Table.PrimaryKey) {
+    emitLookupDeclaration(Table, *Table.PrimaryKey, OS);
+    OS << ";\n";
+  }
+  for (const auto &Index : Table.Indices) {
+    emitLookupDeclaration(Table, *Index, OS);
+    OS << ";\n";
+  }
 
-  std::vector<std::string> FieldNames;
-  for (const RecordVal &Field : InstanceClass->getValues()) {
-    std::string FieldName = Field.getName();
+  OS << "#endif\n\n";
 
-    // Skip uninteresting fields: either built-in, special to us, or injected
-    // template parameters (if they contain a ':').
-    if (FieldName.find(':') != std::string::npos || FieldName == "NAME" ||
-        FieldName == "SearchableFields" || FieldName == "EnumNameField" ||
-        FieldName == "EnumValueField")
-      continue;
+  emitIfdef((Twine("GET_") + Table.PreprocessorGuard + "_IMPL").str(), OS);
 
-    FieldNames.push_back(FieldName);
-  }
+  // The primary data table contains all the fields defined for this map.
+  OS << "const " << Table.CppTypeName << " " << Table.Name << "[] = {\n";
+  for (unsigned i = 0; i < Table.Entries.size(); ++i) {
+    Record *Entry = Table.Entries[i];
+    OS << "  { ";
+
+    bool NeedComma = false;
+    for (const auto &Field : Table.Fields) {
+      if (NeedComma)
+        OS << ", ";
+      NeedComma = true;
+
+      OS << primaryRepresentation(Field, Entry->getValueInit(Field.Name));
+    }
 
-  for (auto *Field : *InstanceClass->getValueAsListInit("SearchableFields")) {
-    SearchTables.emplace_back();
-    SearchFieldNames.push_back(Field->getAsUnquotedString());
+    OS << " }, // " << i << "\n";
   }
+  OS << " };\n";
+
+  // Indexes are sorted "{ Thing, PrimaryIdx }" arrays, so that a binary
+  // search can be performed by "Thing".
+  if (Table.PrimaryKey)
+    emitLookupFunction(Table, *Table.PrimaryKey, true, OS);
+  for (const auto &Index : Table.Indices)
+    emitLookupFunction(Table, *Index, false, OS);
 
-  int Idx = 0;
-  for (Record *Item : Items) {
-    for (unsigned i = 0; i < SearchFieldNames.size(); ++i) {
-      Init *SearchVal = Item->getValueInit(SearchFieldNames[i]);
-      SearchTables[i].emplace_back(SearchVal, Idx);
+  OS << "#endif\n\n";
+}
+
+bool SearchableTableEmitter::parseFieldType(GenericField &Field, Init *II) {
+  if (auto DI = dyn_cast<DefInit>(II)) {
+    Record *TypeRec = DI->getDef();
+    if (TypeRec->isSubClassOf("GenericEnum")) {
+      Field.Enum = EnumMap[TypeRec];
+      Field.RecType = RecordRecTy::get(Field.Enum->Class);
+      return true;
     }
-    ++Idx;
   }
 
-  OS << "#ifdef GET_" << TableName.upper() << "_DECL\n";
-  OS << "#undef GET_" << TableName.upper() << "_DECL\n";
+  return false;
+}
+
+std::unique_ptr<SearchIndex>
+SearchableTableEmitter::parseSearchIndex(GenericTable &Table, StringRef Name,
+                                         const std::vector<StringRef> &Key,
+                                         bool EarlyOut) {
+  auto Index = llvm::make_unique<SearchIndex>();
+  Index->Name = Name;
+  Index->EarlyOut = EarlyOut;
+
+  for (const auto &FieldName : Key) {
+    const GenericField *Field = Table.getFieldByName(FieldName);
+    if (!Field)
+      PrintFatalError(Twine("Search index '") + Name +
+                      "' refers to non-existing field '" + FieldName +
+                      "' in table '" + Table.Name + "'");
+    Index->Fields.push_back(*Field);
+  }
 
-  // Next emit the enum containing the top-level names for use in C++ code if
-  // requested
-  if (!InstanceClass->isValueUnset("EnumNameField")) {
-    emitMappingEnum(Items, InstanceClass, OS);
+  if (EarlyOut && isa<StringRecTy>(Index->Fields[0].RecType)) {
+    PrintFatalError(
+        "Early-out is not supported for string types (in search index '" +
+        Twine(Name) + "'");
   }
 
-  // And the declarations for the functions that will perform lookup.
-  for (unsigned i = 0; i < SearchFieldNames.size(); ++i)
-    emitLookupDeclaration(TableName, SearchFieldNames[i],
-                          SearchTables[i][0].first, OS);
+  return Index;
+}
 
-  OS << "#endif\n\n";
+void SearchableTableEmitter::collectEnumEntries(
+    GenericEnum &Enum, StringRef NameField, StringRef ValueField,
+    const std::vector<Record *> &Items) {
+  for (auto EntryRec : Items) {
+    StringRef Name;
+    if (NameField.empty())
+      Name = EntryRec->getName();
+    else
+      Name = EntryRec->getValueAsString(NameField);
+
+    int64_t Value = 0;
+    if (!ValueField.empty())
+      Value = getInt(EntryRec, ValueField);
+
+    Enum.Entries.push_back(llvm::make_unique<GenericEnum::Entry>(Name, Value));
+    Enum.EntryMap.insert(std::make_pair(EntryRec, Enum.Entries.back().get()));
+  }
 
-  OS << "#ifdef GET_" << TableName.upper() << "_IMPL\n";
-  OS << "#undef GET_" << TableName.upper() << "_IMPL\n";
+  if (ValueField.empty()) {
+    std::stable_sort(Enum.Entries.begin(), Enum.Entries.end(),
+                     [](const std::unique_ptr<GenericEnum::Entry> &LHS,
+                        const std::unique_ptr<GenericEnum::Entry> &RHS) {
+                       return LHS->first < RHS->first;
+                     });
 
-  // The primary data table contains all the fields defined for this map.
-  emitPrimaryTable(TableName, FieldNames, SearchFieldNames, SearchTables, Items,
-                   OS);
+    for (size_t i = 0; i < Enum.Entries.size(); ++i)
+      Enum.Entries[i]->second = i;
+  }
+}
 
-  // Indexes are sorted "{ Thing, PrimaryIdx }" arrays, so that a binary
-  // search can be performed by "Thing".
-  for (unsigned i = 0; i < SearchTables.size(); ++i) {
-    emitSearchTable(TableName, SearchFieldNames[i], SearchTables[i], OS);
-    emitLookupFunction(TableName, SearchFieldNames[i], SearchTables[i][0].first,
-                       OS);
+void SearchableTableEmitter::collectTableEntries(
+    GenericTable &Table, const std::vector<Record *> &Items) {
+  for (auto EntryRec : Items) {
+    for (auto &Field : Table.Fields) {
+      auto TI = dyn_cast<TypedInit>(EntryRec->getValueInit(Field.Name));
+      if (!TI) {
+        PrintFatalError(Twine("Record '") + EntryRec->getName() +
+                        "' in table '" + Table.Name + "' is missing field '" +
+                        Field.Name + "'");
+      }
+      if (!Field.RecType) {
+        Field.RecType = TI->getType();
+      } else {
+        RecTy *Ty = resolveTypes(Field.RecType, TI->getType());
+        if (!Ty)
+          PrintFatalError(Twine("Field '") + Field.Name + "' of table '" +
+                          Table.Name + "' has incompatible type: " +
+                          Ty->getAsString() + " vs. " +
+                          TI->getType()->getAsString());
+        Field.RecType = Ty;
+      }
+    }
+
+    Table.Entries.push_back(EntryRec);
   }
 
-  OS << "#endif\n";
+  Record *IntrinsicClass = Records.getClass("Intrinsic");
+  Record *InstructionClass = Records.getClass("Instruction");
+  for (auto &Field : Table.Fields) {
+    if (auto RecordTy = dyn_cast<RecordRecTy>(Field.RecType)) {
+      if (IntrinsicClass && RecordTy->isSubClassOf(IntrinsicClass))
+        Field.IsIntrinsic = true;
+      else if (InstructionClass && RecordTy->isSubClassOf(InstructionClass))
+        Field.IsInstruction = true;
+    }
+  }
 }
 
 void SearchableTableEmitter::run(raw_ostream &OS) {
-  // Tables are defined to be the direct descendents of "SearchableEntry".
+  // Emit tables in a deterministic order to avoid needless rebuilds.
+  SmallVector<std::unique_ptr<GenericTable>, 4> Tables;
+  DenseMap<Record *, GenericTable *> TableMap;
+
+  // Collect all definitions first.
+  for (auto EnumRec : Records.getAllDerivedDefinitions("GenericEnum")) {
+    StringRef NameField;
+    if (!EnumRec->isValueUnset("NameField"))
+      NameField = EnumRec->getValueAsString("NameField");
+
+    StringRef ValueField;
+    if (!EnumRec->isValueUnset("ValueField"))
+      ValueField = EnumRec->getValueAsString("ValueField");
+
+    auto Enum = llvm::make_unique<GenericEnum>();
+    Enum->Name = EnumRec->getName();
+    Enum->PreprocessorGuard = EnumRec->getName();
+
+    StringRef FilterClass = EnumRec->getValueAsString("FilterClass");
+    Enum->Class = Records.getClass(FilterClass);
+    if (!Enum->Class)
+      PrintFatalError(Twine("Enum FilterClass '") + FilterClass +
+                      "' does not exist");
+
+    collectEnumEntries(*Enum, NameField, ValueField,
+                       Records.getAllDerivedDefinitions(FilterClass));
+    EnumMap.insert(std::make_pair(EnumRec, Enum.get()));
+    Enums.emplace_back(std::move(Enum));
+  }
+
+  for (auto TableRec : Records.getAllDerivedDefinitions("GenericTable")) {
+    auto Table = llvm::make_unique<GenericTable>();
+    Table->Name = TableRec->getName();
+    Table->PreprocessorGuard = TableRec->getName();
+    Table->CppTypeName = TableRec->getValueAsString("CppTypeName");
+
+    std::vector<StringRef> Fields = TableRec->getValueAsListOfStrings("Fields");
+    for (const auto &FieldName : Fields) {
+      Table->Fields.emplace_back(FieldName);
+
+      if (auto TypeOfVal = TableRec->getValue(("TypeOf_" + FieldName).str())) {
+        if (!parseFieldType(Table->Fields.back(), TypeOfVal->getValue())) {
+          PrintFatalError(Twine("Table '") + Table->Name +
+                          "' has bad 'TypeOf_" + FieldName + "': " +
+                          TypeOfVal->getValue()->getAsString());
+        }
+      }
+    }
+
+    collectTableEntries(*Table, Records.getAllDerivedDefinitions(
+                                    TableRec->getValueAsString("FilterClass")));
+
+    if (!TableRec->isValueUnset("PrimaryKey")) {
+      Table->PrimaryKey =
+          parseSearchIndex(*Table, TableRec->getValueAsString("PrimaryKeyName"),
+                           TableRec->getValueAsListOfStrings("PrimaryKey"),
+                           TableRec->getValueAsBit("PrimaryKeyEarlyOut"));
+
+      std::stable_sort(Table->Entries.begin(), Table->Entries.end(),
+                       [&](Record *LHS, Record *RHS) {
+                         return compareBy(LHS, RHS, *Table->PrimaryKey);
+                       });
+    }
+
+    TableMap.insert(std::make_pair(TableRec, Table.get()));
+    Tables.emplace_back(std::move(Table));
+  }
+
+  for (Record *IndexRec : Records.getAllDerivedDefinitions("SearchIndex")) {
+    Record *TableRec = IndexRec->getValueAsDef("Table");
+    auto It = TableMap.find(TableRec);
+    if (It == TableMap.end())
+      PrintFatalError(Twine("SearchIndex '") + IndexRec->getName() +
+                      "' refers to non-existing table '" + TableRec->getName());
+
+    GenericTable &Table = *It->second;
+    Table.Indices.push_back(parseSearchIndex(
+        Table, IndexRec->getName(), IndexRec->getValueAsListOfStrings("Key"),
+        IndexRec->getValueAsBit("EarlyOut")));
+  }
+
+  // Translate legacy tables.
   Record *SearchableTable = Records.getClass("SearchableTable");
   for (auto &NameRec : Records.getClasses()) {
     Record *Class = NameRec.second.get();
     if (Class->getSuperClasses().size() != 1 ||
         !Class->isSubClassOf(SearchableTable))
       continue;
-    emitMapping(Class, OS);
+
+    StringRef TableName = Class->getName();
+    std::vector<Record *> Items = Records.getAllDerivedDefinitions(TableName);
+    if (!Class->isValueUnset("EnumNameField")) {
+      StringRef NameField = Class->getValueAsString("EnumNameField");
+      StringRef ValueField;
+      if (!Class->isValueUnset("EnumValueField"))
+        ValueField = Class->getValueAsString("EnumValueField");
+
+      auto Enum = llvm::make_unique<GenericEnum>();
+      Enum->Name = (Twine(Class->getName()) + "Values").str();
+      Enum->PreprocessorGuard = Class->getName().upper();
+      Enum->Class = Class;
+
+      collectEnumEntries(*Enum, NameField, ValueField, Items);
+
+      Enums.emplace_back(std::move(Enum));
+    }
+
+    auto Table = llvm::make_unique<GenericTable>();
+    Table->Name = (Twine(Class->getName()) + "sList").str();
+    Table->PreprocessorGuard = Class->getName().upper();
+    Table->CppTypeName = Class->getName();
+
+    for (const RecordVal &Field : Class->getValues()) {
+      std::string FieldName = Field.getName();
+
+      // Skip uninteresting fields: either special to us, or injected
+      // template parameters (if they contain a ':').
+      if (FieldName.find(':') != std::string::npos ||
+          FieldName == "SearchableFields" || FieldName == "EnumNameField" ||
+          FieldName == "EnumValueField")
+        continue;
+
+      Table->Fields.emplace_back(FieldName);
+    }
+
+    collectTableEntries(*Table, Items);
+
+    for (const auto &Field :
+         Class->getValueAsListOfStrings("SearchableFields")) {
+      std::string Name =
+          (Twine("lookup") + Table->CppTypeName + "By" + Field).str();
+      Table->Indices.push_back(parseSearchIndex(*Table, Name, {Field}, false));
+    }
+
+    Tables.emplace_back(std::move(Table));
   }
+
+  // Emit everything.
+  for (const auto &Enum : Enums)
+    emitGenericEnum(*Enum, OS);
+
+  for (const auto &Table : Tables)
+    emitGenericTable(*Table, OS);
+
+  // Put all #undefs last, to allow multiple sections guarded by the same
+  // define.
+  for (const auto &Guard : PreprocessorGuards)
+    OS << "#undef " << Guard << "\n";
 }
 
 namespace llvm {
diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp
index 2c5658f8ce75..c5da8d8142ff 100644
--- a/utils/TableGen/SubtargetEmitter.cpp
+++ b/utils/TableGen/SubtargetEmitter.cpp
@@ -13,6 +13,7 @@
 
 #include "CodeGenTarget.h"
 #include "CodeGenSchedule.h"
+#include "PredicateExpander.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
@@ -90,8 +91,14 @@ class SubtargetEmitter {
   void EmitItineraries(raw_ostream &OS,
                        std::vector<std::vector<InstrItinerary>>
                          &ProcItinLists);
+  unsigned EmitRegisterFileTables(const CodeGenProcModel &ProcModel,
+                                  raw_ostream &OS);
+  void EmitExtraProcessorInfo(const CodeGenProcModel &ProcModel,
+                              raw_ostream &OS);
   void EmitProcessorProp(raw_ostream &OS, const Record *R, StringRef Name,
                          char Separator);
+  void EmitProcessorResourceSubUnits(const CodeGenProcModel &ProcModel,
+                                     raw_ostream &OS);
   void EmitProcessorResources(const CodeGenProcModel &ProcModel,
                               raw_ostream &OS);
   Record *FindWriteResources(const CodeGenSchedRW &SchedWrite,
@@ -106,6 +113,10 @@ class SubtargetEmitter {
   void EmitProcessorModels(raw_ostream &OS);
   void EmitProcessorLookup(raw_ostream &OS);
   void EmitSchedModelHelpers(const std::string &ClassName, raw_ostream &OS);
+  void emitSchedModelHelpersImpl(raw_ostream &OS,
+                                 bool OnlyExpandMCInstPredicates = false);
+  void emitGenMCSubtargetInfo(raw_ostream &OS);
+
   void EmitSchedModel(raw_ostream &OS);
   void EmitHwModeCheck(const std::string &ClassName, raw_ostream &OS);
   void ParseFeaturesFunction(raw_ostream &OS, unsigned NumFeatures,
@@ -128,7 +139,7 @@ void SubtargetEmitter::Enumeration(raw_ostream &OS) {
   // Get all records of class and sort
   std::vector<Record*> DefList =
     Records.getAllDerivedDefinitions("SubtargetFeature");
-  std::sort(DefList.begin(), DefList.end(), LessRecord());
+  llvm::sort(DefList.begin(), DefList.end(), LessRecord());
 
   unsigned N = DefList.size();
   if (N == 0)
@@ -167,7 +178,7 @@ unsigned SubtargetEmitter::FeatureKeyValues(raw_ostream &OS) {
   if (FeatureList.empty())
     return 0;
 
-  std::sort(FeatureList.begin(), FeatureList.end(), LessRecordFieldName());
+  llvm::sort(FeatureList.begin(), FeatureList.end(), LessRecordFieldName());
 
   // Begin feature table
   OS << "// Sorted (by key) array of values for CPU features.\n"
@@ -192,8 +203,7 @@ unsigned SubtargetEmitter::FeatureKeyValues(raw_ostream &OS) {
        << "\"" << Desc << "\", "
        << "{ " << Target << "::" << Name << " }, ";
 
-    const std::vector<Record*> &ImpliesList =
-      Feature->getValueAsListOfDefs("Implies");
+    RecVec ImpliesList = Feature->getValueAsListOfDefs("Implies");
 
     OS << "{";
     for (unsigned j = 0, M = ImpliesList.size(); j < M;) {
@@ -218,7 +228,7 @@ unsigned SubtargetEmitter::CPUKeyValues(raw_ostream &OS) {
   // Gather and sort processor information
   std::vector<Record*> ProcessorList =
                           Records.getAllDerivedDefinitions("Processor");
-  std::sort(ProcessorList.begin(), ProcessorList.end(), LessRecordFieldName());
+  llvm::sort(ProcessorList.begin(), ProcessorList.end(), LessRecordFieldName());
 
   // Begin processor table
   OS << "// Sorted (by key) array of values for CPU subtype.\n"
@@ -228,8 +238,7 @@ unsigned SubtargetEmitter::CPUKeyValues(raw_ostream &OS) {
   // For each processor
   for (Record *Processor : ProcessorList) {
     StringRef Name = Processor->getValueAsString("Name");
-    const std::vector<Record*> &FeatureList =
-      Processor->getValueAsListOfDefs("Features");
+    RecVec FeatureList = Processor->getValueAsListOfDefs("Features");
 
     // Emit as { "cpu", "description", { f1 , f2 , ... fn } },
     OS << "  { "
@@ -261,8 +270,7 @@ void SubtargetEmitter::FormItineraryStageString(const std::string &Name,
                                                 std::string &ItinString,
                                                 unsigned &NStages) {
   // Get states list
-  const std::vector<Record*> &StageList =
-    ItinData->getValueAsListOfDefs("Stages");
+  RecVec StageList = ItinData->getValueAsListOfDefs("Stages");
 
   // For each stage
   unsigned N = NStages = StageList.size();
@@ -275,7 +283,7 @@ void SubtargetEmitter::FormItineraryStageString(const std::string &Name,
     ItinString += "  { " + itostr(Cycles) + ", ";
 
     // Get unit list
-    const std::vector<Record*> &UnitList = Stage->getValueAsListOfDefs("Units");
+    RecVec UnitList = Stage->getValueAsListOfDefs("Units");
 
     // For each unit
     for (unsigned j = 0, M = UnitList.size(); j < M;) {
@@ -304,7 +312,7 @@ void SubtargetEmitter::FormItineraryStageString(const std::string &Name,
 void SubtargetEmitter::FormItineraryOperandCycleString(Record *ItinData,
                          std::string &ItinString, unsigned &NOperandCycles) {
   // Get operand cycle list
-  const std::vector<int64_t> &OperandCycleList =
+  std::vector<int64_t> OperandCycleList =
     ItinData->getValueAsListOfInts("OperandCycles");
 
   // For each operand cycle
@@ -322,8 +330,7 @@ void SubtargetEmitter::FormItineraryBypassString(const std::string &Name,
                                                  Record *ItinData,
                                                  std::string &ItinString,
                                                  unsigned NOperandCycles) {
-  const std::vector<Record*> &BypassList =
-    ItinData->getValueAsListOfDefs("Bypasses");
+  RecVec BypassList = ItinData->getValueAsListOfDefs("Bypasses");
   unsigned N = BypassList.size();
   unsigned i = 0;
   for (; i < N;) {
@@ -354,7 +361,7 @@ EmitStageAndOperandCycleData(raw_ostream &OS,
     if (!ItinsDefSet.insert(ProcModel.ItinsDef).second)
       continue;
 
-    std::vector<Record*> FUs = ProcModel.ItinsDef->getValueAsListOfDefs("FU");
+    RecVec FUs = ProcModel.ItinsDef->getValueAsListOfDefs("FU");
     if (FUs.empty())
       continue;
 
@@ -368,9 +375,9 @@ EmitStageAndOperandCycleData(raw_ostream &OS,
 
     OS << "} // end namespace " << Name << "FU\n";
 
-    std::vector<Record*> BPs = ProcModel.ItinsDef->getValueAsListOfDefs("BP");
+    RecVec BPs = ProcModel.ItinsDef->getValueAsListOfDefs("BP");
     if (!BPs.empty()) {
-      OS << "\n// Pipeline forwarding pathes for itineraries \"" << Name
+      OS << "\n// Pipeline forwarding paths for itineraries \"" << Name
          << "\"\n" << "namespace " << Name << "Bypass {\n";
 
       OS << "  const unsigned NoBypass = 0;\n";
@@ -442,7 +449,7 @@ EmitStageAndOperandCycleData(raw_ostream &OS,
       }
 
       // Check to see if stage already exists and create if it doesn't
-      unsigned FindStage = 0;
+      uint16_t FindStage = 0;
       if (NStages > 0) {
         FindStage = ItinStageMap[ItinStageString];
         if (FindStage == 0) {
@@ -458,7 +465,7 @@ EmitStageAndOperandCycleData(raw_ostream &OS,
       }
 
       // Check to see if operand cycle already exists and create if it doesn't
-      unsigned FindOperandCycle = 0;
+      uint16_t FindOperandCycle = 0;
       if (NOperandCycles > 0) {
         std::string ItinOperandString = ItinOperandCycleString+ItinBypassString;
         FindOperandCycle = ItinOperandMap[ItinOperandString];
@@ -480,10 +487,14 @@ EmitStageAndOperandCycleData(raw_ostream &OS,
       }
 
       // Set up itinerary as location and location + stage count
-      int NumUOps = ItinData ? ItinData->getValueAsInt("NumMicroOps") : 0;
-      InstrItinerary Intinerary = { NumUOps, FindStage, FindStage + NStages,
-                                    FindOperandCycle,
-                                    FindOperandCycle + NOperandCycles };
+      int16_t NumUOps = ItinData ? ItinData->getValueAsInt("NumMicroOps") : 0;
+      InstrItinerary Intinerary = {
+          NumUOps,
+          FindStage,
+          uint16_t(FindStage + NStages),
+          FindOperandCycle,
+          uint16_t(FindOperandCycle + NOperandCycles),
+      };
 
       // Inject - empty slots will be 0, 0
       ItinList[SchedClassIdx] = Intinerary;
@@ -559,7 +570,8 @@ EmitItineraries(raw_ostream &OS,
         ", // " << j << " " << SchedModels.getSchedClass(j).Name << "\n";
     }
     // End processor itinerary table
-    OS << "  { 0, ~0U, ~0U, ~0U, ~0U } // end marker\n";
+    OS << "  { 0, uint16_t(~0U), uint16_t(~0U), uint16_t(~0U), uint16_t(~0U) }"
+          "// end marker\n";
     OS << "};\n";
   }
 }
@@ -578,24 +590,216 @@ void SubtargetEmitter::EmitProcessorProp(raw_ostream &OS, const Record *R,
   OS << '\n';
 }
 
+void SubtargetEmitter::EmitProcessorResourceSubUnits(
+    const CodeGenProcModel &ProcModel, raw_ostream &OS) {
+  OS << "\nstatic const unsigned " << ProcModel.ModelName
+     << "ProcResourceSubUnits[] = {\n"
+     << "  0,  // Invalid\n";
+
+  for (unsigned i = 0, e = ProcModel.ProcResourceDefs.size(); i < e; ++i) {
+    Record *PRDef = ProcModel.ProcResourceDefs[i];
+    if (!PRDef->isSubClassOf("ProcResGroup"))
+      continue;
+    RecVec ResUnits = PRDef->getValueAsListOfDefs("Resources");
+    for (Record *RUDef : ResUnits) {
+      Record *const RU =
+          SchedModels.findProcResUnits(RUDef, ProcModel, PRDef->getLoc());
+      for (unsigned J = 0; J < RU->getValueAsInt("NumUnits"); ++J) {
+        OS << "  " << ProcModel.getProcResourceIdx(RU) << ", ";
+      }
+    }
+    OS << "  // " << PRDef->getName() << "\n";
+  }
+  OS << "};\n";
+}
+
+static void EmitRetireControlUnitInfo(const CodeGenProcModel &ProcModel,
+                                      raw_ostream &OS) {
+  int64_t ReorderBufferSize = 0, MaxRetirePerCycle = 0;
+  if (Record *RCU = ProcModel.RetireControlUnit) {
+    ReorderBufferSize =
+        std::max(ReorderBufferSize, RCU->getValueAsInt("ReorderBufferSize"));
+    MaxRetirePerCycle =
+        std::max(MaxRetirePerCycle, RCU->getValueAsInt("MaxRetirePerCycle"));
+  }
+
+  OS << ReorderBufferSize << ", // ReorderBufferSize\n  ";
+  OS << MaxRetirePerCycle << ", // MaxRetirePerCycle\n  ";
+}
+
+static void EmitRegisterFileInfo(const CodeGenProcModel &ProcModel,
+                                 unsigned NumRegisterFiles,
+                                 unsigned NumCostEntries, raw_ostream &OS) {
+  if (NumRegisterFiles)
+    OS << ProcModel.ModelName << "RegisterFiles,\n  " << (1 + NumRegisterFiles);
+  else
+    OS << "nullptr,\n  0";
+
+  OS << ", // Number of register files.\n  ";
+  if (NumCostEntries)
+    OS << ProcModel.ModelName << "RegisterCosts,\n  ";
+  else
+    OS << "nullptr,\n  ";
+  OS << NumCostEntries << ", // Number of register cost entries.\n";
+}
+
+unsigned
+SubtargetEmitter::EmitRegisterFileTables(const CodeGenProcModel &ProcModel,
+                                         raw_ostream &OS) {
+  if (llvm::all_of(ProcModel.RegisterFiles, [](const CodeGenRegisterFile &RF) {
+        return RF.hasDefaultCosts();
+      }))
+    return 0;
+
+  // Print the RegisterCost table first.
+  OS << "\n// {RegisterClassID, Register Cost}\n";
+  OS << "static const llvm::MCRegisterCostEntry " << ProcModel.ModelName
+     << "RegisterCosts"
+     << "[] = {\n";
+
+  for (const CodeGenRegisterFile &RF : ProcModel.RegisterFiles) {
+    // Skip register files with a default cost table.
+    if (RF.hasDefaultCosts())
+      continue;
+    // Add entries to the cost table.
+    for (const CodeGenRegisterCost &RC : RF.Costs) {
+      OS << "  { ";
+      Record *Rec = RC.RCDef;
+      if (Rec->getValue("Namespace"))
+        OS << Rec->getValueAsString("Namespace") << "::";
+      OS << Rec->getName() << "RegClassID, " << RC.Cost << "},\n";
+    }
+  }
+  OS << "};\n";
+
+  // Now generate a table with register file info.
+  OS << "\n // {Name, #PhysRegs, #CostEntries, IndexToCostTbl}\n";
+  OS << "static const llvm::MCRegisterFileDesc " << ProcModel.ModelName
+     << "RegisterFiles"
+     << "[] = {\n"
+     << "  { \"InvalidRegisterFile\", 0, 0, 0 },\n";
+  unsigned CostTblIndex = 0;
+
+  for (const CodeGenRegisterFile &RD : ProcModel.RegisterFiles) {
+    OS << "  { ";
+    OS << '"' << RD.Name << '"' << ", " << RD.NumPhysRegs << ", ";
+    unsigned NumCostEntries = RD.Costs.size();
+    OS << NumCostEntries << ", " << CostTblIndex << "},\n";
+    CostTblIndex += NumCostEntries;
+  }
+  OS << "};\n";
+
+  return CostTblIndex;
+}
+
+static bool EmitPfmIssueCountersTable(const CodeGenProcModel &ProcModel,
+                                      raw_ostream &OS) {
+  unsigned NumCounterDefs = 1 + ProcModel.ProcResourceDefs.size();
+  std::vector<const Record *> CounterDefs(NumCounterDefs);
+  bool HasCounters = false;
+  for (const Record *CounterDef : ProcModel.PfmIssueCounterDefs) {
+    const Record *&CD = CounterDefs[ProcModel.getProcResourceIdx(
+        CounterDef->getValueAsDef("Resource"))];
+    if (CD) {
+      PrintFatalError(CounterDef->getLoc(),
+                      "multiple issue counters for " +
+                          CounterDef->getValueAsDef("Resource")->getName());
+    }
+    CD = CounterDef;
+    HasCounters = true;
+  }
+  if (!HasCounters) {
+    return false;
+  }
+  OS << "\nstatic const char* " << ProcModel.ModelName
+     << "PfmIssueCounters[] = {\n";
+  for (unsigned i = 0; i != NumCounterDefs; ++i) {
+    const Record *CounterDef = CounterDefs[i];
+    if (CounterDef) {
+      const auto PfmCounters = CounterDef->getValueAsListOfStrings("Counters");
+      if (PfmCounters.empty())
+        PrintFatalError(CounterDef->getLoc(), "empty counter list");
+      OS << "  \"" << PfmCounters[0];
+      for (unsigned p = 1, e = PfmCounters.size(); p != e; ++p)
+        OS << ",\" \"" << PfmCounters[p];
+      OS << "\",  // #" << i << " = ";
+      OS << CounterDef->getValueAsDef("Resource")->getName() << "\n";
+    } else {
+      OS << "  nullptr, // #" << i << "\n";
+    }
+  }
+  OS << "};\n";
+  return true;
+}
+
+static void EmitPfmCounters(const CodeGenProcModel &ProcModel,
+                            const bool HasPfmIssueCounters, raw_ostream &OS) {
+  OS << "  {\n";
+  // Emit the cycle counter.
+  if (ProcModel.PfmCycleCounterDef)
+    OS << "    \"" << ProcModel.PfmCycleCounterDef->getValueAsString("Counter")
+       << "\",  // Cycle counter.\n";
+  else
+    OS << "    nullptr,  // No cycle counter.\n";
+
+  // Emit a reference to issue counters table.
+  if (HasPfmIssueCounters)
+    OS << "    " << ProcModel.ModelName << "PfmIssueCounters\n";
+  else
+    OS << "    nullptr  // No issue counters.\n";
+  OS << "  }\n";
+}
+
+void SubtargetEmitter::EmitExtraProcessorInfo(const CodeGenProcModel &ProcModel,
+                                              raw_ostream &OS) {
+  // Generate a table of register file descriptors (one entry per each user
+  // defined register file), and a table of register costs.
+  unsigned NumCostEntries = EmitRegisterFileTables(ProcModel, OS);
+
+  // Generate a table of ProcRes counter names.
+  const bool HasPfmIssueCounters = EmitPfmIssueCountersTable(ProcModel, OS);
+
+  // Now generate a table for the extra processor info.
+  OS << "\nstatic const llvm::MCExtraProcessorInfo " << ProcModel.ModelName
+     << "ExtraInfo = {\n  ";
+
+  // Add information related to the retire control unit.
+  EmitRetireControlUnitInfo(ProcModel, OS);
+
+  // Add information related to the register files (i.e. where to find register
+  // file descriptors and register costs).
+  EmitRegisterFileInfo(ProcModel, ProcModel.RegisterFiles.size(),
+                       NumCostEntries, OS);
+
+  EmitPfmCounters(ProcModel, HasPfmIssueCounters, OS);
+
+  OS << "};\n";
+}
+
 void SubtargetEmitter::EmitProcessorResources(const CodeGenProcModel &ProcModel,
                                               raw_ostream &OS) {
-  OS << "\n// {Name, NumUnits, SuperIdx, IsBuffered}\n";
-  OS << "static const llvm::MCProcResourceDesc "
-     << ProcModel.ModelName << "ProcResources" << "[] = {\n"
-     << "  {DBGFIELD(\"InvalidUnit\")     0, 0, 0},\n";
+  EmitProcessorResourceSubUnits(ProcModel, OS);
 
+  OS << "\n// {Name, NumUnits, SuperIdx, IsBuffered, SubUnitsIdxBegin}\n";
+  OS << "static const llvm::MCProcResourceDesc " << ProcModel.ModelName
+     << "ProcResources"
+     << "[] = {\n"
+     << "  {\"InvalidUnit\", 0, 0, 0, 0},\n";
+
+  unsigned SubUnitsOffset = 1;
   for (unsigned i = 0, e = ProcModel.ProcResourceDefs.size(); i < e; ++i) {
     Record *PRDef = ProcModel.ProcResourceDefs[i];
 
     Record *SuperDef = nullptr;
     unsigned SuperIdx = 0;
     unsigned NumUnits = 0;
+    const unsigned SubUnitsBeginOffset = SubUnitsOffset;
     int BufferSize = PRDef->getValueAsInt("BufferSize");
     if (PRDef->isSubClassOf("ProcResGroup")) {
       RecVec ResUnits = PRDef->getValueAsListOfDefs("Resources");
       for (Record *RU : ResUnits) {
         NumUnits += RU->getValueAsInt("NumUnits");
+        SubUnitsOffset += RU->getValueAsInt("NumUnits");
       }
     }
     else {
@@ -609,11 +813,17 @@ void SubtargetEmitter::EmitProcessorResources(const CodeGenProcModel &ProcModel,
       NumUnits = PRDef->getValueAsInt("NumUnits");
     }
     // Emit the ProcResourceDesc
-    OS << "  {DBGFIELD(\"" << PRDef->getName() << "\") ";
+    OS << "  {\"" << PRDef->getName() << "\", ";
     if (PRDef->getName().size() < 15)
       OS.indent(15 - PRDef->getName().size());
-    OS << NumUnits << ", " << SuperIdx << ", "
-       << BufferSize << "}, // #" << i+1;
+    OS << NumUnits << ", " << SuperIdx << ", " << BufferSize << ", ";
+    if (SubUnitsBeginOffset != SubUnitsOffset) {
+      OS << ProcModel.ModelName << "ProcResourceSubUnits + "
+         << SubUnitsBeginOffset;
+    } else {
+      OS << "nullptr";
+    }
+    OS << "}, // #" << i+1;
     if (SuperDef)
       OS << ", Super=" << SuperDef->getName();
     OS << "\n";
@@ -731,8 +941,7 @@ Record *SubtargetEmitter::FindReadAdvance(const CodeGenSchedRW &SchedRead,
 void SubtargetEmitter::ExpandProcResources(RecVec &PRVec,
                                            std::vector<int64_t> &Cycles,
                                            const CodeGenProcModel &PM) {
-  // Default to 1 resource cycle.
-  Cycles.resize(PRVec.size(), 1);
+  assert(PRVec.size() == Cycles.size() && "failed precondition");
   for (unsigned i = 0, e = PRVec.size(); i != e; ++i) {
     Record *PRDef = PRVec[i];
     RecVec SubResources;
@@ -783,9 +992,9 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
     return;
 
   std::vector<MCSchedClassDesc> &SCTab = SchedTables.ProcSchedClasses.back();
-  DEBUG(dbgs() << "\n+++ SCHED CLASSES (GenSchedClassTables) +++\n");
+  LLVM_DEBUG(dbgs() << "\n+++ SCHED CLASSES (GenSchedClassTables) +++\n");
   for (const CodeGenSchedClass &SC : SchedModels.schedClasses()) {
-    DEBUG(SC.dump(&SchedModels));
+    LLVM_DEBUG(SC.dump(&SchedModels));
 
     SCTab.resize(SCTab.size() + 1);
     MCSchedClassDesc &SCDesc = SCTab.back();
@@ -823,7 +1032,7 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
     IdxVec Writes = SC.Writes;
     IdxVec Reads = SC.Reads;
     if (!SC.InstRWs.empty()) {
-      // This class has a default ReadWrite list which can be overriden by
+      // This class has a default ReadWrite list which can be overridden by
       // InstRW definitions.
       Record *RWDef = nullptr;
       for (Record *RW : SC.InstRWs) {
@@ -851,8 +1060,9 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
         }
       }
       if (Writes.empty()) {
-        DEBUG(dbgs() << ProcModel.ModelName
-              << " does not have resources for class " << SC.Name << '\n');
+        LLVM_DEBUG(dbgs() << ProcModel.ModelName
+                          << " does not have resources for class " << SC.Name
+                          << '\n');
       }
     }
     // Sum resources across all operand writes.
@@ -900,6 +1110,21 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
         std::vector<int64_t> Cycles =
           WriteRes->getValueAsListOfInts("ResourceCycles");
 
+        if (Cycles.empty()) {
+          // If ResourceCycles is not provided, default to one cycle per
+          // resource.
+          Cycles.resize(PRVec.size(), 1);
+        } else if (Cycles.size() != PRVec.size()) {
+          // If ResourceCycles is provided, check consistency.
+          PrintFatalError(
+              WriteRes->getLoc(),
+              Twine("Inconsistent resource cycles: !size(ResourceCycles) != "
+                    "!size(ProcResources): ")
+                  .concat(Twine(PRVec.size()))
+                  .concat(" vs ")
+                  .concat(Twine(Cycles.size())));
+        }
+
         ExpandProcResources(PRVec, Cycles, ProcModel);
 
         for (unsigned PRIdx = 0, PREnd = PRVec.size();
@@ -949,7 +1174,7 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
           WriteIDs.push_back(SchedModels.getSchedRWIdx(VW, /*IsRead=*/false));
         }
       }
-      std::sort(WriteIDs.begin(), WriteIDs.end());
+      llvm::sort(WriteIDs.begin(), WriteIDs.end());
       for(unsigned W : WriteIDs) {
         MCReadAdvanceEntry RAEntry;
         RAEntry.UseIdx = UseIdx;
@@ -967,8 +1192,8 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
     // compression.
     //
     // WritePrecRes entries are sorted by ProcResIdx.
-    std::sort(WriteProcResources.begin(), WriteProcResources.end(),
-              LessWriteProcResources());
+    llvm::sort(WriteProcResources.begin(), WriteProcResources.end(),
+               LessWriteProcResources());
 
     SCDesc.NumWriteProcResEntries = WriteProcResources.size();
     std::vector<MCWriteProcResEntry>::iterator WPRPos =
@@ -1119,6 +1344,9 @@ void SubtargetEmitter::EmitSchedClassTables(SchedClassTables &SchedTables,
 void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) {
   // For each processor model.
   for (const CodeGenProcModel &PM : SchedModels.procModels()) {
+    // Emit extra processor info if available.
+    if (PM.hasExtraProcessorInfo())
+      EmitExtraProcessorInfo(PM, OS);
     // Emit processor resource table.
     if (PM.hasInstrSchedModel())
       EmitProcessorResources(PM, OS);
@@ -1159,9 +1387,13 @@ void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) {
       OS << "  nullptr, nullptr, 0, 0,"
          << " // No instruction-level machine model.\n";
     if (PM.hasItineraries())
-      OS << "  " << PM.ItinsDef->getName() << "\n";
+      OS << "  " << PM.ItinsDef->getName() << ",\n";
+    else
+      OS << "  nullptr, // No Itinerary\n";
+    if (PM.hasExtraProcessorInfo())
+      OS << "  &" << PM.ModelName << "ExtraInfo,\n";
     else
-      OS << "  nullptr // No Itinerary\n";
+      OS << "  nullptr // No extra processor descriptor\n";
     OS << "};\n";
   }
 }
@@ -1173,7 +1405,7 @@ void SubtargetEmitter::EmitProcessorLookup(raw_ostream &OS) {
   // Gather and sort processor information
   std::vector<Record*> ProcessorList =
                           Records.getAllDerivedDefinitions("Processor");
-  std::sort(ProcessorList.begin(), ProcessorList.end(), LessRecordFieldName());
+  llvm::sort(ProcessorList.begin(), ProcessorList.end(), LessRecordFieldName());
 
   // Begin processor table
   OS << "\n";
@@ -1231,58 +1463,111 @@ void SubtargetEmitter::EmitSchedModel(raw_ostream &OS) {
   OS << "\n#undef DBGFIELD";
 }
 
-void SubtargetEmitter::EmitSchedModelHelpers(const std::string &ClassName,
-                                             raw_ostream &OS) {
-  OS << "unsigned " << ClassName
-     << "\n::resolveSchedClass(unsigned SchedClass, const MachineInstr *MI,"
-     << " const TargetSchedModel *SchedModel) const {\n";
+static void emitPredicateProlog(const RecordKeeper &Records, raw_ostream &OS) {
+  std::string Buffer;
+  raw_string_ostream Stream(Buffer);
+
+  // Collect all the PredicateProlog records and print them to the output
+  // stream.
+  std::vector<Record *> Prologs =
+      Records.getAllDerivedDefinitions("PredicateProlog");
+  llvm::sort(Prologs.begin(), Prologs.end(), LessRecord());
+  for (Record *P : Prologs)
+    Stream << P->getValueAsString("Code") << '\n';
 
-  std::vector<Record*> Prologs = Records.getAllDerivedDefinitions("PredicateProlog");
-  std::sort(Prologs.begin(), Prologs.end(), LessRecord());
-  for (Record *P : Prologs) {
-    OS << P->getValueAsString("Code") << '\n';
+  Stream.flush();
+  OS << Buffer;
+}
+
+static void emitPredicates(const CodeGenSchedTransition &T,
+                           const CodeGenSchedClass &SC,
+                           PredicateExpander &PE,
+                           raw_ostream &OS) {
+  std::string Buffer;
+  raw_string_ostream StringStream(Buffer);
+  formatted_raw_ostream FOS(StringStream);
+
+  FOS.PadToColumn(6);
+  FOS << "if (";
+  for (RecIter RI = T.PredTerm.begin(), RE = T.PredTerm.end(); RI != RE; ++RI) {
+    if (RI != T.PredTerm.begin()) {
+      FOS << "\n";
+      FOS.PadToColumn(8);
+      FOS << "&& ";
+    }
+    const Record *Rec = *RI;
+    if (Rec->isSubClassOf("MCSchedPredicate"))
+      PE.expandPredicate(FOS, Rec->getValueAsDef("Pred"));
+    else
+      FOS << "(" << Rec->getValueAsString("Predicate") << ")";
   }
+
+  FOS << ")\n";
+  FOS.PadToColumn(8);
+  FOS << "return " << T.ToClassIdx << "; // " << SC.Name << '\n';
+  FOS.flush();
+  OS << Buffer;
+}
+
+void SubtargetEmitter::emitSchedModelHelpersImpl(
+    raw_ostream &OS, bool OnlyExpandMCInstPredicates) {
+  // Collect Variant Classes.
   IdxVec VariantClasses;
   for (const CodeGenSchedClass &SC : SchedModels.schedClasses()) {
     if (SC.Transitions.empty())
       continue;
     VariantClasses.push_back(SC.Index);
   }
+
   if (!VariantClasses.empty()) {
-    OS << "  switch (SchedClass) {\n";
+    bool FoundPredicates = false;
     for (unsigned VC : VariantClasses) {
+      // Emit code for each variant scheduling class.
       const CodeGenSchedClass &SC = SchedModels.getSchedClass(VC);
-      OS << "  case " << VC << ": // " << SC.Name << '\n';
       IdxVec ProcIndices;
       for (const CodeGenSchedTransition &T : SC.Transitions) {
+        if (OnlyExpandMCInstPredicates &&
+            !all_of(T.PredTerm, [](const Record *Rec) {
+              return Rec->isSubClassOf("MCSchedPredicate");
+            }))
+          continue;
+
         IdxVec PI;
         std::set_union(T.ProcIndices.begin(), T.ProcIndices.end(),
                        ProcIndices.begin(), ProcIndices.end(),
                        std::back_inserter(PI));
         ProcIndices.swap(PI);
       }
+      if (ProcIndices.empty())
+        continue;
+
+      // Emit a switch statement only if there are predicates to expand.
+      if (!FoundPredicates) {
+        OS << "  switch (SchedClass) {\n";
+        FoundPredicates = true;
+      }
+
+      OS << "  case " << VC << ": // " << SC.Name << '\n';
+      PredicateExpander PE;
+      PE.setByRef(false);
+      PE.setExpandForMC(OnlyExpandMCInstPredicates);
       for (unsigned PI : ProcIndices) {
         OS << "    ";
-        if (PI != 0)
-          OS << "if (SchedModel->getProcessorID() == " << PI << ") ";
-        OS << "{ // " << (SchedModels.procModelBegin() + PI)->ModelName
-           << '\n';
+        if (PI != 0) {
+          OS << (OnlyExpandMCInstPredicates
+                     ? "if (CPUID == "
+                     : "if (SchedModel->getProcessorID() == ");
+          OS << PI << ") ";
+        }
+        OS << "{ // " << (SchedModels.procModelBegin() + PI)->ModelName << '\n';
+
         for (const CodeGenSchedTransition &T : SC.Transitions) {
-          if (PI != 0 && !std::count(T.ProcIndices.begin(),
-                                     T.ProcIndices.end(), PI)) {
-              continue;
-          }
-          OS << "      if (";
-          for (RecIter RI = T.PredTerm.begin(), RE = T.PredTerm.end();
-               RI != RE; ++RI) {
-            if (RI != T.PredTerm.begin())
-              OS << "\n          && ";
-            OS << "(" << (*RI)->getValueAsString("Predicate") << ")";
-          }
-          OS << ")\n"
-             << "        return " << T.ToClassIdx << "; // "
-             << SchedModels.getSchedClass(T.ToClassIdx).Name << '\n';
+          if (PI != 0 && !count(T.ProcIndices, PI))
+            continue;
+          PE.setIndentLevel(4);
+          emitPredicates(T, SchedModels.getSchedClass(T.ToClassIdx), PE, OS);
         }
+
         OS << "    }\n";
         if (PI == 0)
           break;
@@ -1291,10 +1576,40 @@ void SubtargetEmitter::EmitSchedModelHelpers(const std::string &ClassName,
         OS << "    return " << SC.Index << ";\n";
       OS << "    break;\n";
     }
-    OS << "  };\n";
+
+    if (FoundPredicates)
+     OS << "  };\n";
   }
-  OS << "  report_fatal_error(\"Expected a variant SchedClass\");\n"
-     << "} // " << ClassName << "::resolveSchedClass\n";
+
+  if (OnlyExpandMCInstPredicates) {
+    OS << "  // Don't know how to resolve this scheduling class.\n"
+       << "  return 0;\n";
+    return;
+  }
+
+  OS << "  report_fatal_error(\"Expected a variant SchedClass\");\n";
+}
+
+void SubtargetEmitter::EmitSchedModelHelpers(const std::string &ClassName,
+                                             raw_ostream &OS) {
+  OS << "unsigned " << ClassName
+     << "\n::resolveSchedClass(unsigned SchedClass, const MachineInstr *MI,"
+     << " const TargetSchedModel *SchedModel) const {\n";
+
+  // Emit the predicate prolog code.
+  emitPredicateProlog(Records, OS);
+
+  // Emit target predicates.
+  emitSchedModelHelpersImpl(OS);
+  
+  OS << "} // " << ClassName << "::resolveSchedClass\n\n";
+
+  OS << "unsigned " << ClassName
+     << "\n::resolveVariantSchedClass(unsigned SchedClass, const MCInst *MI,"
+     << " unsigned CPUID) const {\n"
+     << "  return " << Target << "_MC"
+     << "::resolveVariantSchedClassImpl(SchedClass, MI, CPUID);\n"
+     << "} // " << ClassName << "::resolveVariantSchedClass\n";
 }
 
 void SubtargetEmitter::EmitHwModeCheck(const std::string &ClassName,
@@ -1322,15 +1637,15 @@ void SubtargetEmitter::ParseFeaturesFunction(raw_ostream &OS,
                                              unsigned NumProcs) {
   std::vector<Record*> Features =
                        Records.getAllDerivedDefinitions("SubtargetFeature");
-  std::sort(Features.begin(), Features.end(), LessRecord());
+  llvm::sort(Features.begin(), Features.end(), LessRecord());
 
   OS << "// ParseSubtargetFeatures - Parses features string setting specified\n"
      << "// subtarget options.\n"
      << "void llvm::";
   OS << Target;
   OS << "Subtarget::ParseSubtargetFeatures(StringRef CPU, StringRef FS) {\n"
-     << "  DEBUG(dbgs() << \"\\nFeatures:\" << FS);\n"
-     << "  DEBUG(dbgs() << \"\\nCPU:\" << CPU << \"\\n\\n\");\n";
+     << "  LLVM_DEBUG(dbgs() << \"\\nFeatures:\" << FS);\n"
+     << "  LLVM_DEBUG(dbgs() << \"\\nCPU:\" << CPU << \"\\n\\n\");\n";
 
   if (Features.empty()) {
     OS << "}\n";
@@ -1360,6 +1675,34 @@ void SubtargetEmitter::ParseFeaturesFunction(raw_ostream &OS,
   OS << "}\n";
 }
 
+void SubtargetEmitter::emitGenMCSubtargetInfo(raw_ostream &OS) {
+  OS << "namespace " << Target << "_MC {\n"
+     << "unsigned resolveVariantSchedClassImpl(unsigned SchedClass,\n"
+     << "    const MCInst *MI, unsigned CPUID) {\n";
+  emitSchedModelHelpersImpl(OS, /* OnlyExpandMCPredicates */ true);
+  OS << "}\n";
+  OS << "} // end of namespace " << Target << "_MC\n\n";
+
+  OS << "struct " << Target
+     << "GenMCSubtargetInfo : public MCSubtargetInfo {\n";
+  OS << "  " << Target << "GenMCSubtargetInfo(const Triple &TT, \n"
+     << "    StringRef CPU, StringRef FS, ArrayRef<SubtargetFeatureKV> PF,\n"
+     << "    ArrayRef<SubtargetFeatureKV> PD,\n"
+     << "    const SubtargetInfoKV *ProcSched,\n"
+     << "    const MCWriteProcResEntry *WPR,\n"
+     << "    const MCWriteLatencyEntry *WL,\n"
+     << "    const MCReadAdvanceEntry *RA, const InstrStage *IS,\n"
+     << "    const unsigned *OC, const unsigned *FP) :\n"
+     << "      MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched,\n"
+     << "                      WPR, WL, RA, IS, OC, FP) { }\n\n"
+     << "  unsigned resolveVariantSchedClass(unsigned SchedClass,\n"
+     << "      const MCInst *MI, unsigned CPUID) const override {\n"
+     << "    return " << Target << "_MC"
+     << "::resolveVariantSchedClassImpl(SchedClass, MI, CPUID); \n";
+  OS << "  }\n";
+  OS << "};\n";
+}
+
 //
 // SubtargetEmitter::run - Main subtarget enumeration emitter.
 //
@@ -1392,10 +1735,12 @@ void SubtargetEmitter::run(raw_ostream &OS) {
 #endif
 
   // MCInstrInfo initialization routine.
+  emitGenMCSubtargetInfo(OS);
+
   OS << "\nstatic inline MCSubtargetInfo *create" << Target
      << "MCSubtargetInfoImpl("
      << "const Triple &TT, StringRef CPU, StringRef FS) {\n";
-  OS << "  return new MCSubtargetInfo(TT, CPU, FS, ";
+  OS << "  return new " << Target << "GenMCSubtargetInfo(TT, CPU, FS, ";
   if (NumFeatures)
     OS << Target << "FeatureKV, ";
   else
@@ -1438,6 +1783,10 @@ void SubtargetEmitter::run(raw_ostream &OS) {
   std::string ClassName = Target + "GenSubtargetInfo";
   OS << "namespace llvm {\n";
   OS << "class DFAPacketizer;\n";
+  OS << "namespace " << Target << "_MC {\n"
+     << "unsigned resolveVariantSchedClassImpl(unsigned SchedClass,"
+     << " const MCInst *MI, unsigned CPUID);\n"
+     << "}\n\n";
   OS << "struct " << ClassName << " : public TargetSubtargetInfo {\n"
      << "  explicit " << ClassName << "(const Triple &TT, StringRef CPU, "
      << "StringRef FS);\n"
@@ -1445,6 +1794,8 @@ void SubtargetEmitter::run(raw_ostream &OS) {
      << "  unsigned resolveSchedClass(unsigned SchedClass, "
      << " const MachineInstr *DefMI,"
      << " const TargetSchedModel *SchedModel) const override;\n"
+     << "  unsigned resolveVariantSchedClass(unsigned SchedClass,"
+     << " const MCInst *MI, unsigned CPUID) const override;\n"
      << "  DFAPacketizer *createDFAPacketizer(const InstrItineraryData *IID)"
      << " const;\n";
   if (TGT.getHwModes().getNumModeIds() > 1)
diff --git a/utils/TableGen/SubtargetFeatureInfo.cpp b/utils/TableGen/SubtargetFeatureInfo.cpp
index 5153c35b1261..f9b8853cc117 100644
--- a/utils/TableGen/SubtargetFeatureInfo.cpp
+++ b/utils/TableGen/SubtargetFeatureInfo.cpp
@@ -10,6 +10,7 @@
 #include "SubtargetFeatureInfo.h"
 
 #include "Types.h"
+#include "llvm/Config/llvm-config.h"
 #include "llvm/TableGen/Record.h"
 
 #include <map>
diff --git a/utils/TableGen/SubtargetFeatureInfo.h b/utils/TableGen/SubtargetFeatureInfo.h
index c55c16a4031e..71e6748c863f 100644
--- a/utils/TableGen/SubtargetFeatureInfo.h
+++ b/utils/TableGen/SubtargetFeatureInfo.h
@@ -27,20 +27,20 @@ using SubtargetFeatureInfoMap = std::map<Record *, SubtargetFeatureInfo, LessRec
 /// Helper class for storing information on a subtarget feature which
 /// participates in instruction matching.
 struct SubtargetFeatureInfo {
-  /// \brief The predicate record for this feature.
+  /// The predicate record for this feature.
   Record *TheDef;
 
-  /// \brief An unique index assigned to represent this feature.
+  /// An unique index assigned to represent this feature.
   uint64_t Index;
 
   SubtargetFeatureInfo(Record *D, uint64_t Idx) : TheDef(D), Index(Idx) {}
 
-  /// \brief The name of the enumerated constant identifying this feature.
+  /// The name of the enumerated constant identifying this feature.
   std::string getEnumName() const {
     return "Feature_" + TheDef->getName().str();
   }
 
-  /// \brief The name of the enumerated constant identifying the bitnumber for
+  /// The name of the enumerated constant identifying the bitnumber for
   /// this feature.
   std::string getEnumBitName() const {
     return "Feature_" + TheDef->getName().str() + "Bit";
diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp
index b0e0385a45c7..b78260625cb2 100644
--- a/utils/TableGen/TableGen.cpp
+++ b/utils/TableGen/TableGen.cpp
@@ -24,6 +24,7 @@ using namespace llvm;
 
 enum ActionType {
   PrintRecords,
+  DumpJSON,
   GenEmitter,
   GenRegisterInfo,
   GenInstrInfo,
@@ -32,13 +33,16 @@ enum ActionType {
   GenAsmMatcher,
   GenDisassembler,
   GenPseudoLowering,
+  GenCompressInst,
   GenCallingConv,
   GenDAGISel,
   GenDFAPacketizer,
   GenFastISel,
   GenSubtarget,
-  GenIntrinsic,
-  GenTgtIntrinsic,
+  GenIntrinsicEnums,
+  GenIntrinsicImpl,
+  GenTgtIntrinsicEnums,
+  GenTgtIntrinsicImpl,
   PrintEnums,
   PrintSets,
   GenOptParserDefs,
@@ -56,6 +60,8 @@ namespace {
   Action(cl::desc("Action to perform:"),
          cl::values(clEnumValN(PrintRecords, "print-records",
                                "Print all records to stdout (default)"),
+                    clEnumValN(DumpJSON, "dump-json",
+                               "Dump all records as machine-readable JSON"),
                     clEnumValN(GenEmitter, "gen-emitter",
                                "Generate machine code emitter"),
                     clEnumValN(GenRegisterInfo, "gen-register-info",
@@ -72,6 +78,8 @@ namespace {
                                "Generate disassembler"),
                     clEnumValN(GenPseudoLowering, "gen-pseudo-lowering",
                                "Generate pseudo instruction lowering"),
+                    clEnumValN(GenCompressInst, "gen-compress-inst-emitter",
+                               "Generate RISCV compressed instructions."),
                     clEnumValN(GenAsmMatcher, "gen-asm-matcher",
                                "Generate assembly instruction matcher"),
                     clEnumValN(GenDAGISel, "gen-dag-isel",
@@ -82,9 +90,13 @@ namespace {
                                "Generate a \"fast\" instruction selector"),
                     clEnumValN(GenSubtarget, "gen-subtarget",
                                "Generate subtarget enumerations"),
-                    clEnumValN(GenIntrinsic, "gen-intrinsic",
+                    clEnumValN(GenIntrinsicEnums, "gen-intrinsic-enums",
+                               "Generate intrinsic enums"),
+                    clEnumValN(GenIntrinsicImpl, "gen-intrinsic-impl",
                                "Generate intrinsic information"),
-                    clEnumValN(GenTgtIntrinsic, "gen-tgt-intrinsic",
+                    clEnumValN(GenTgtIntrinsicEnums, "gen-tgt-intrinsic-enums",
+                               "Generate target intrinsic enums"),
+                    clEnumValN(GenTgtIntrinsicImpl, "gen-tgt-intrinsic-impl",
                                "Generate target intrinsic information"),
                     clEnumValN(PrintEnums, "print-enums",
                                "Print enum values for a class"),
@@ -117,6 +129,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
   case PrintRecords:
     OS << Records;           // No argument, dump all contents
     break;
+  case DumpJSON:
+    EmitJSON(Records, OS);
+    break;
   case GenEmitter:
     EmitCodeEmitter(Records, OS);
     break;
@@ -144,6 +159,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
   case GenPseudoLowering:
     EmitPseudoLowering(Records, OS);
     break;
+  case GenCompressInst:
+    EmitCompressInst(Records, OS);
+    break;
   case GenDAGISel:
     EmitDAGISel(Records, OS);
     break;
@@ -156,11 +174,17 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
   case GenSubtarget:
     EmitSubtarget(Records, OS);
     break;
-  case GenIntrinsic:
-    EmitIntrinsics(Records, OS);
+  case GenIntrinsicEnums:
+    EmitIntrinsicEnums(Records, OS);
+    break;
+  case GenIntrinsicImpl:
+    EmitIntrinsicImpl(Records, OS);
+    break;
+  case GenTgtIntrinsicEnums:
+    EmitIntrinsicEnums(Records, OS, true);
     break;
-  case GenTgtIntrinsic:
-    EmitIntrinsics(Records, OS, true);
+  case GenTgtIntrinsicImpl:
+    EmitIntrinsicImpl(Records, OS, true);
     break;
   case GenOptParserDefs:
     EmitOptParser(Records, OS);
diff --git a/utils/TableGen/TableGenBackends.h b/utils/TableGen/TableGenBackends.h
index 914cd5a1fc9b..1329a6d833f4 100644
--- a/utils/TableGen/TableGenBackends.h
+++ b/utils/TableGen/TableGenBackends.h
@@ -62,7 +62,10 @@ namespace llvm {
 class raw_ostream;
 class RecordKeeper;
 
-void EmitIntrinsics(RecordKeeper &RK, raw_ostream &OS, bool TargetOnly = false);
+void EmitIntrinsicEnums(RecordKeeper &RK, raw_ostream &OS,
+                        bool TargetOnly = false);
+void EmitIntrinsicImpl(RecordKeeper &RK, raw_ostream &OS,
+                       bool TargetOnly = false);
 void EmitAsmMatcher(RecordKeeper &RK, raw_ostream &OS);
 void EmitAsmWriter(RecordKeeper &RK, raw_ostream &OS);
 void EmitCallingConv(RecordKeeper &RK, raw_ostream &OS);
@@ -74,6 +77,7 @@ void EmitFastISel(RecordKeeper &RK, raw_ostream &OS);
 void EmitInstrInfo(RecordKeeper &RK, raw_ostream &OS);
 void EmitInstrDocs(RecordKeeper &RK, raw_ostream &OS);
 void EmitPseudoLowering(RecordKeeper &RK, raw_ostream &OS);
+void EmitCompressInst(RecordKeeper &RK, raw_ostream &OS);
 void EmitRegisterInfo(RecordKeeper &RK, raw_ostream &OS);
 void EmitSubtarget(RecordKeeper &RK, raw_ostream &OS);
 void EmitMapTable(RecordKeeper &RK, raw_ostream &OS);
diff --git a/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp b/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
new file mode 100644
index 000000000000..df63337d5637
--- /dev/null
+++ b/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
@@ -0,0 +1,116 @@
+//===- WebAssemblyDisassemblerEmitter.cpp - Disassembler tables -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the WebAssembly Disassembler Emitter.
+// It contains the implementation of the disassembler tables.
+// Documentation for the disassembler emitter in general can be found in
+// WebAssemblyDisassemblerEmitter.h.
+//
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyDisassemblerEmitter.h"
+#include "llvm/TableGen/Record.h"
+
+namespace llvm {
+
+void emitWebAssemblyDisassemblerTables(
+    raw_ostream &OS,
+    const ArrayRef<const CodeGenInstruction *> &NumberedInstructions) {
+  // First lets organize all opcodes by (prefix) byte. Prefix 0 is the
+  // starting table.
+  std::map<unsigned,
+           std::map<unsigned, std::pair<unsigned, const CodeGenInstruction *>>>
+      OpcodeTable;
+  for (unsigned I = 0; I != NumberedInstructions.size(); ++I) {
+    auto &CGI = *NumberedInstructions[I];
+    auto &Def = *CGI.TheDef;
+    if (!Def.getValue("Inst"))
+      continue;
+    auto &Inst = *Def.getValueAsBitsInit("Inst");
+    auto Opc = static_cast<unsigned>(
+        reinterpret_cast<IntInit *>(Inst.convertInitializerTo(IntRecTy::get()))
+            ->getValue());
+    if (Opc == 0xFFFFFFFF)
+      continue; // No opcode defined.
+    assert(Opc <= 0xFFFF);
+    auto Prefix = Opc >> 8;
+    Opc = Opc & 0xFF;
+    auto &CGIP = OpcodeTable[Prefix][Opc];
+    if (!CGIP.second ||
+        // Make sure we store the variant with the least amount of operands,
+        // which is the one without explicit registers. Only few instructions
+        // have these currently, would be good to have for all of them.
+        // FIXME: this picks the first of many typed variants, which is
+        // currently the except_ref one, though this shouldn't matter for
+        // disassembly purposes.
+        CGIP.second->Operands.OperandList.size() >
+            CGI.Operands.OperandList.size()) {
+      CGIP = std::make_pair(I, &CGI);
+    }
+  }
+  OS << "#include \"MCTargetDesc/WebAssemblyMCTargetDesc.h\"\n";
+  OS << "\n";
+  OS << "namespace llvm {\n\n";
+  OS << "enum EntryType : uint8_t { ";
+  OS << "ET_Unused, ET_Prefix, ET_Instruction };\n\n";
+  OS << "struct WebAssemblyInstruction {\n";
+  OS << "  uint16_t Opcode;\n";
+  OS << "  EntryType ET;\n";
+  OS << "  uint8_t NumOperands;\n";
+  OS << "  uint8_t Operands[4];\n";
+  OS << "};\n\n";
+  // Output one table per prefix.
+  for (auto &PrefixPair : OpcodeTable) {
+    if (PrefixPair.second.empty())
+      continue;
+    OS << "WebAssemblyInstruction InstructionTable" << PrefixPair.first;
+    OS << "[] = {\n";
+    for (unsigned I = 0; I <= 0xFF; I++) {
+      auto InstIt = PrefixPair.second.find(I);
+      if (InstIt != PrefixPair.second.end()) {
+        // Regular instruction.
+        assert(InstIt->second.second);
+        auto &CGI = *InstIt->second.second;
+        OS << "  // 0x";
+        OS.write_hex(static_cast<unsigned long long>(I));
+        OS << ": " << CGI.AsmString << "\n";
+        OS << "  { " << InstIt->second.first << ", ET_Instruction, ";
+        OS << CGI.Operands.OperandList.size() << ", {\n";
+        for (auto &Op : CGI.Operands.OperandList) {
+          OS << "      " << Op.OperandType << ",\n";
+        }
+        OS << "    }\n";
+      } else {
+        auto PrefixIt = OpcodeTable.find(I);
+        // If we have a non-empty table for it that's not 0, this is a prefix.
+        if (PrefixIt != OpcodeTable.end() && I && !PrefixPair.first) {
+          OS << "  { 0, ET_Prefix, 0, {}";
+        } else {
+          OS << "  { 0, ET_Unused, 0, {}";
+        }
+      }
+      OS << "  },\n";
+    }
+    OS << "};\n\n";
+  }
+  // Create a table of all extension tables:
+  OS << "struct { uint8_t Prefix; const WebAssemblyInstruction *Table; }\n";
+  OS << "PrefixTable[] = {\n";
+  for (auto &PrefixPair : OpcodeTable) {
+    if (PrefixPair.second.empty() || !PrefixPair.first)
+      continue;
+    OS << "  { " << PrefixPair.first << ", InstructionTable"
+       << PrefixPair.first;
+    OS << " },\n";
+  }
+  OS << "  { 0, nullptr }\n};\n\n";
+  OS << "} // End llvm namespace\n";
+}
+
+} // namespace llvm
diff --git a/utils/TableGen/WebAssemblyDisassemblerEmitter.h b/utils/TableGen/WebAssemblyDisassemblerEmitter.h
new file mode 100644
index 000000000000..91f820f120a2
--- /dev/null
+++ b/utils/TableGen/WebAssemblyDisassemblerEmitter.h
@@ -0,0 +1,30 @@
+//===- WebAssemblyDisassemblerEmitter.h - Disassembler tables ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the WebAssembly Disassembler Emitter.
+// It contains the interface of the disassembler tables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_TABLEGEN_WEBASSEMBLYDISASSEMBLEREMITTER_H
+#define LLVM_UTILS_TABLEGEN_WEBASSEMBLYDISASSEMBLEREMITTER_H
+
+#include "CodeGenInstruction.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+void emitWebAssemblyDisassemblerTables(
+    raw_ostream &OS,
+    const ArrayRef<const CodeGenInstruction *> &NumberedInstructions);
+
+} // namespace llvm
+
+#endif
diff --git a/utils/TableGen/X86DisassemblerShared.h b/utils/TableGen/X86DisassemblerShared.h
index e5889e92415d..220765f72410 100644
--- a/utils/TableGen/X86DisassemblerShared.h
+++ b/utils/TableGen/X86DisassemblerShared.h
@@ -13,7 +13,7 @@
 #include <cstring>
 #include <string>
 
-#include "../../lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h"
+#include "llvm/Support/X86DisassemblerDecoderCommon.h"
 
 struct InstructionSpecifier {
   llvm::X86Disassembler::OperandSpecifier
@@ -49,6 +49,10 @@ struct OpcodeDecision {
 /// entries in this table, rather than 2^(ATTR_max).
 struct ContextDecision {
   OpcodeDecision opcodeDecisions[llvm::X86Disassembler::IC_max];
+
+  ContextDecision() {
+    memset(opcodeDecisions, 0, sizeof(opcodeDecisions));
+  }
 };
 
 #endif
diff --git a/utils/TableGen/X86DisassemblerTables.cpp b/utils/TableGen/X86DisassemblerTables.cpp
index fce41f7a2cc2..2b5cc1279605 100644
--- a/utils/TableGen/X86DisassemblerTables.cpp
+++ b/utils/TableGen/X86DisassemblerTables.cpp
@@ -112,6 +112,10 @@ static inline bool inheritsFrom(InstructionContext child,
     return inheritsFrom(child, IC_64BIT_XD_OPSIZE);
   case IC_XS_OPSIZE:
     return inheritsFrom(child, IC_64BIT_XS_OPSIZE);
+  case IC_XD_ADSIZE:
+    return inheritsFrom(child, IC_64BIT_XD_ADSIZE);
+  case IC_XS_ADSIZE:
+    return inheritsFrom(child, IC_64BIT_XS_ADSIZE);
   case IC_64BIT_REXW:
     return((noPrefix && inheritsFrom(child, IC_64BIT_REXW_XS, noPrefix)) ||
            (noPrefix && inheritsFrom(child, IC_64BIT_REXW_XD, noPrefix)) ||
@@ -122,12 +126,17 @@ static inline bool inheritsFrom(InstructionContext child,
            (!AdSize64 && inheritsFrom(child, IC_64BIT_OPSIZE_ADSIZE)) ||
            (!AdSize64 && inheritsFrom(child, IC_64BIT_REXW_ADSIZE));
   case IC_64BIT_XD:
-    return(inheritsFrom(child, IC_64BIT_REXW_XD));
+    return(inheritsFrom(child, IC_64BIT_REXW_XD) ||
+           (!AdSize64 && inheritsFrom(child, IC_64BIT_XD_ADSIZE)));
   case IC_64BIT_XS:
-    return(inheritsFrom(child, IC_64BIT_REXW_XS));
+    return(inheritsFrom(child, IC_64BIT_REXW_XS) ||
+           (!AdSize64 && inheritsFrom(child, IC_64BIT_XS_ADSIZE)));
   case IC_64BIT_XD_OPSIZE:
   case IC_64BIT_XS_OPSIZE:
     return false;
+  case IC_64BIT_XD_ADSIZE:
+  case IC_64BIT_XS_ADSIZE:
+    return false;
   case IC_64BIT_REXW_XD:
   case IC_64BIT_REXW_XS:
   case IC_64BIT_REXW_OPSIZE:
@@ -642,21 +651,13 @@ static const char* stringForDecisionType(ModRMDecisionType dt) {
 }
 
 DisassemblerTables::DisassemblerTables() {
-  unsigned i;
-
-  for (i = 0; i < array_lengthof(Tables); i++) {
-    Tables[i] = new ContextDecision;
-    memset(Tables[i], 0, sizeof(ContextDecision));
-  }
+  for (unsigned i = 0; i < array_lengthof(Tables); i++)
+    Tables[i] = llvm::make_unique<ContextDecision>();
 
   HasConflicts = false;
 }
 
 DisassemblerTables::~DisassemblerTables() {
-  unsigned i;
-
-  for (i = 0; i < array_lengthof(Tables); i++)
-    delete Tables[i];
 }
 
 void DisassemblerTables::emitModRMDecision(raw_ostream &o1, raw_ostream &o2,
@@ -961,8 +962,12 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, unsigned &i) const {
       o << "IC_64BIT_REXW_ADSIZE";
     else if ((index & ATTR_64BIT) && (index & ATTR_XD) && (index & ATTR_OPSIZE))
       o << "IC_64BIT_XD_OPSIZE";
+    else if ((index & ATTR_64BIT) && (index & ATTR_XD) && (index & ATTR_ADSIZE))
+      o << "IC_64BIT_XD_ADSIZE";
     else if ((index & ATTR_64BIT) && (index & ATTR_XS) && (index & ATTR_OPSIZE))
       o << "IC_64BIT_XS_OPSIZE";
+    else if ((index & ATTR_64BIT) && (index & ATTR_XS) && (index & ATTR_ADSIZE))
+      o << "IC_64BIT_XS_ADSIZE";
     else if ((index & ATTR_64BIT) && (index & ATTR_XS))
       o << "IC_64BIT_XS";
     else if ((index & ATTR_64BIT) && (index & ATTR_XD))
@@ -982,6 +987,10 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, unsigned &i) const {
       o << "IC_XS_OPSIZE";
     else if ((index & ATTR_XD) && (index & ATTR_OPSIZE))
       o << "IC_XD_OPSIZE";
+    else if ((index & ATTR_XS) && (index & ATTR_ADSIZE))
+      o << "IC_XS_ADSIZE";
+    else if ((index & ATTR_XD) && (index & ATTR_ADSIZE))
+      o << "IC_XD_ADSIZE";
     else if (index & ATTR_XS)
       o << "IC_XS";
     else if (index & ATTR_XD)
@@ -1019,6 +1028,7 @@ void DisassemblerTables::emitContextDecisions(raw_ostream &o1, raw_ostream &o2,
   emitContextDecision(o1, o2, i1, i2, ModRMTableNum, *Tables[4], XOP8_MAP_STR);
   emitContextDecision(o1, o2, i1, i2, ModRMTableNum, *Tables[5], XOP9_MAP_STR);
   emitContextDecision(o1, o2, i1, i2, ModRMTableNum, *Tables[6], XOPA_MAP_STR);
+  emitContextDecision(o1, o2, i1, i2, ModRMTableNum, *Tables[7], THREEDNOW_MAP_STR);
 }
 
 void DisassemblerTables::emit(raw_ostream &o) const {
@@ -1075,14 +1085,9 @@ void DisassemblerTables::setTableFields(ModRMDecision     &decision,
 
         if(previousInfo.name == "NOOP" && (newInfo.name == "XCHG16ar" ||
                                            newInfo.name == "XCHG32ar" ||
-                                           newInfo.name == "XCHG32ar64" ||
                                            newInfo.name == "XCHG64ar"))
           continue; // special case for XCHG*ar and NOOP
 
-        if (previousInfo.name == "DATA16_PREFIX" &&
-            newInfo.name == "DATA32_PREFIX")
-          continue; // special case for data16 and data32
-
         if (outranks(previousInfo.insnContext, newInfo.insnContext))
           continue;
 
diff --git a/utils/TableGen/X86DisassemblerTables.h b/utils/TableGen/X86DisassemblerTables.h
index 552bbe95f7cd..b0ea9c2e8625 100644
--- a/utils/TableGen/X86DisassemblerTables.h
+++ b/utils/TableGen/X86DisassemblerTables.h
@@ -41,7 +41,8 @@ private:
   /// [4] XOP8 map opcode
   /// [5] XOP9 map opcode
   /// [6] XOPA map opcode
-  ContextDecision* Tables[7];
+  /// [7] 3dnow map opcode
+  std::unique_ptr<ContextDecision> Tables[8];
 
   // Table of ModRM encodings.
   typedef std::map<std::vector<unsigned>, unsigned> ModRMMapTy;
diff --git a/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp b/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
index 05f30facd547..d5dc10ecad25 100644
--- a/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
+++ b/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
@@ -12,7 +12,6 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "CodeGenDAGPatterns.h"
 #include "CodeGenTarget.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/TableGenBackend.h"
@@ -22,6 +21,7 @@ using namespace llvm;
 namespace {
 
 class X86EVEX2VEXTablesEmitter {
+  RecordKeeper &Records;
   CodeGenTarget Target;
 
   // Hold all non-masked & non-broadcasted EVEX encoded instructions
@@ -36,15 +36,8 @@ class X86EVEX2VEXTablesEmitter {
   std::vector<Entry> EVEX2VEX128;
   std::vector<Entry> EVEX2VEX256;
 
-  // Represents a manually added entry to the tables
-  struct ManualEntry {
-    const char *EVEXInstStr;
-    const char *VEXInstStr;
-    bool Is128Bit;
-  };
-
 public:
-  X86EVEX2VEXTablesEmitter(RecordKeeper &R) : Target(R) {}
+  X86EVEX2VEXTablesEmitter(RecordKeeper &R) : Records(R), Target(R) {}
 
   // run - Output X86 EVEX2VEX tables.
   void run(raw_ostream &OS);
@@ -53,36 +46,11 @@ private:
   // Prints the given table as a C++ array of type
   // X86EvexToVexCompressTableEntry
   void printTable(const std::vector<Entry> &Table, raw_ostream &OS);
-
-  bool inExceptionList(const CodeGenInstruction *Inst) {
-    // List of EVEX instructions that match VEX instructions by the encoding
-    // but do not perform the same operation.
-    static constexpr const char *ExceptionList[] = {
-        "VCVTQQ2PD",
-        "VCVTQQ2PS",
-        "VPMAXSQ",
-        "VPMAXUQ",
-        "VPMINSQ",
-        "VPMINUQ",
-        "VPMULLQ",
-        "VPSRAQ",
-        "VDBPSADBW",
-        "VRNDSCALE",
-        "VSCALEFPS"
-    };
-    // Instruction's name starts with one of the entries in the exception list
-    for (StringRef InstStr : ExceptionList) {
-      if (Inst->TheDef->getName().startswith(InstStr))
-        return true;
-    }
-    return false;
-  }
-
 };
 
 void X86EVEX2VEXTablesEmitter::printTable(const std::vector<Entry> &Table,
                                           raw_ostream &OS) {
-  std::string Size = (Table == EVEX2VEX128) ? "128" : "256";
+  StringRef Size = (Table == EVEX2VEX128) ? "128" : "256";
 
   OS << "// X86 EVEX encoded instructions that have a VEX " << Size
      << " encoding\n"
@@ -97,83 +65,6 @@ void X86EVEX2VEXTablesEmitter::printTable(const std::vector<Entry> &Table,
        << ", X86::" << Pair.second->TheDef->getName() << " },\n";
   }
 
-  // Some VEX instructions were duplicated to multiple EVEX versions due the
-  // introduction of mask variants, and thus some of the EVEX versions have
-  // different encoding than the VEX instruction. In order to maximize the
-  // compression we add these entries manually.
-  static constexpr ManualEntry ManuallyAddedEntries[] = {
-      // EVEX-Inst            VEX-Inst           Is128-bit
-      {"VMOVDQU8Z128mr",      "VMOVDQUmr",       true},
-      {"VMOVDQU8Z128rm",      "VMOVDQUrm",       true},
-      {"VMOVDQU8Z128rr",      "VMOVDQUrr",       true},
-      {"VMOVDQU8Z128rr_REV",  "VMOVDQUrr_REV",   true},
-      {"VMOVDQU16Z128mr",     "VMOVDQUmr",       true},
-      {"VMOVDQU16Z128rm",     "VMOVDQUrm",       true},
-      {"VMOVDQU16Z128rr",     "VMOVDQUrr",       true},
-      {"VMOVDQU16Z128rr_REV", "VMOVDQUrr_REV",   true},
-      {"VMOVDQU8Z256mr",      "VMOVDQUYmr",      false},
-      {"VMOVDQU8Z256rm",      "VMOVDQUYrm",      false},
-      {"VMOVDQU8Z256rr",      "VMOVDQUYrr",      false},
-      {"VMOVDQU8Z256rr_REV",  "VMOVDQUYrr_REV",  false},
-      {"VMOVDQU16Z256mr",     "VMOVDQUYmr",      false},
-      {"VMOVDQU16Z256rm",     "VMOVDQUYrm",      false},
-      {"VMOVDQU16Z256rr",     "VMOVDQUYrr",      false},
-      {"VMOVDQU16Z256rr_REV", "VMOVDQUYrr_REV",  false},
-
-      {"VPERMILPDZ128mi",     "VPERMILPDmi",     true},
-      {"VPERMILPDZ128ri",     "VPERMILPDri",     true},
-      {"VPERMILPDZ128rm",     "VPERMILPDrm",     true},
-      {"VPERMILPDZ128rr",     "VPERMILPDrr",     true},
-      {"VPERMILPDZ256mi",     "VPERMILPDYmi",    false},
-      {"VPERMILPDZ256ri",     "VPERMILPDYri",    false},
-      {"VPERMILPDZ256rm",     "VPERMILPDYrm",    false},
-      {"VPERMILPDZ256rr",     "VPERMILPDYrr",    false},
-
-      {"VPBROADCASTQZ128m",   "VPBROADCASTQrm",  true},
-      {"VPBROADCASTQZ128r",   "VPBROADCASTQrr",  true},
-      {"VPBROADCASTQZ256m",   "VPBROADCASTQYrm", false},
-      {"VPBROADCASTQZ256r",   "VPBROADCASTQYrr", false},
-
-      {"VBROADCASTSDZ256m",   "VBROADCASTSDYrm", false},
-      {"VBROADCASTSDZ256r",   "VBROADCASTSDYrr", false},
-
-      {"VBROADCASTF64X2Z128rm", "VBROADCASTF128", false},
-      {"VBROADCASTI64X2Z128rm", "VBROADCASTI128", false},
-
-      {"VEXTRACTF64x2Z256mr", "VEXTRACTF128mr",  false},
-      {"VEXTRACTF64x2Z256rr", "VEXTRACTF128rr",  false},
-      {"VEXTRACTI64x2Z256mr", "VEXTRACTI128mr",  false},
-      {"VEXTRACTI64x2Z256rr", "VEXTRACTI128rr",  false},
-
-      {"VINSERTF64x2Z256rm",  "VINSERTF128rm",   false},
-      {"VINSERTF64x2Z256rr",  "VINSERTF128rr",   false},
-      {"VINSERTI64x2Z256rm",  "VINSERTI128rm",   false},
-      {"VINSERTI64x2Z256rr",  "VINSERTI128rr",   false},
-
-      // These will require some custom adjustment in the conversion pass.
-      {"VALIGNDZ128rri",      "VPALIGNRrri",     true},
-      {"VALIGNQZ128rri",      "VPALIGNRrri",     true},
-      {"VALIGNDZ128rmi",      "VPALIGNRrmi",     true},
-      {"VALIGNQZ128rmi",      "VPALIGNRrmi",     true},
-      {"VSHUFF32X4Z256rmi",   "VPERM2F128rm",    false},
-      {"VSHUFF32X4Z256rri",   "VPERM2F128rr",    false},
-      {"VSHUFF64X2Z256rmi",   "VPERM2F128rm",    false},
-      {"VSHUFF64X2Z256rri",   "VPERM2F128rr",    false},
-      {"VSHUFI32X4Z256rmi",   "VPERM2I128rm",    false},
-      {"VSHUFI32X4Z256rri",   "VPERM2I128rr",    false},
-      {"VSHUFI64X2Z256rmi",   "VPERM2I128rm",    false},
-      {"VSHUFI64X2Z256rri",   "VPERM2I128rr",    false},
-  };
-
-  // Print the manually added entries
-  for (const ManualEntry &Entry : ManuallyAddedEntries) {
-    if ((Table == EVEX2VEX128 && Entry.Is128Bit) ||
-        (Table == EVEX2VEX256 && !Entry.Is128Bit)) {
-      OS << "  { X86::" << Entry.EVEXInstStr << ", X86::" << Entry.VEXInstStr
-         << " },\n";
-    }
-  }
-
   OS << "};\n\n";
 }
 
@@ -210,31 +101,34 @@ static inline uint64_t getValueFromBitsInit(const BitsInit *B) {
 // Function object - Operator() returns true if the given VEX instruction
 // matches the EVEX instruction of this object.
 class IsMatch {
-  const CodeGenInstruction *Inst;
+  const CodeGenInstruction *EVEXInst;
 
 public:
-  IsMatch(const CodeGenInstruction *Inst) : Inst(Inst) {}
+  IsMatch(const CodeGenInstruction *EVEXInst) : EVEXInst(EVEXInst) {}
 
-  bool operator()(const CodeGenInstruction *Inst2) {
-    Record *Rec1 = Inst->TheDef;
-    Record *Rec2 = Inst2->TheDef;
-    uint64_t Rec1WVEX =
-        getValueFromBitsInit(Rec1->getValueAsBitsInit("VEX_WPrefix"));
-    uint64_t Rec2WVEX =
-        getValueFromBitsInit(Rec2->getValueAsBitsInit("VEX_WPrefix"));
+  bool operator()(const CodeGenInstruction *VEXInst) {
+    Record *RecE = EVEXInst->TheDef;
+    Record *RecV = VEXInst->TheDef;
+    uint64_t EVEX_W =
+        getValueFromBitsInit(RecE->getValueAsBitsInit("VEX_WPrefix"));
+    uint64_t VEX_W =
+        getValueFromBitsInit(RecV->getValueAsBitsInit("VEX_WPrefix"));
 
-    if (Rec2->getValueAsDef("OpEnc")->getName().str() != "EncVEX" ||
+    if (RecV->getValueAsDef("OpEnc")->getName().str() != "EncVEX" ||
         // VEX/EVEX fields
-        Rec2->getValueAsDef("OpPrefix") != Rec1->getValueAsDef("OpPrefix") ||
-        Rec2->getValueAsDef("OpMap") != Rec1->getValueAsDef("OpMap") ||
-        Rec2->getValueAsBit("hasVEX_4V") != Rec1->getValueAsBit("hasVEX_4V") ||
-        !equalBitsInits(Rec2->getValueAsBitsInit("EVEX_LL"),
-                        Rec1->getValueAsBitsInit("EVEX_LL")) ||
-        (Rec1WVEX != 2 && Rec2WVEX != 2 && Rec1WVEX != Rec2WVEX) ||
+        RecV->getValueAsDef("OpPrefix") != RecE->getValueAsDef("OpPrefix") ||
+        RecV->getValueAsDef("OpMap") != RecE->getValueAsDef("OpMap") ||
+        RecV->getValueAsBit("hasVEX_4V") != RecE->getValueAsBit("hasVEX_4V") ||
+        !equalBitsInits(RecV->getValueAsBitsInit("EVEX_LL"),
+                        RecE->getValueAsBitsInit("EVEX_LL")) ||
+        // Match is allowed if either is VEX_WIG, or they match, or EVEX
+        // is VEX_W1X and VEX is VEX_W0.
+        (!(EVEX_W == 2 || VEX_W == 2 || EVEX_W == VEX_W ||
+           (EVEX_W == 3 && VEX_W == 0))) ||
         // Instruction's format
-        Rec2->getValueAsDef("Form") != Rec1->getValueAsDef("Form") ||
-        Rec2->getValueAsBit("isAsmParserOnly") !=
-            Rec1->getValueAsBit("isAsmParserOnly"))
+        RecV->getValueAsDef("Form") != RecE->getValueAsDef("Form") ||
+        RecV->getValueAsBit("isAsmParserOnly") !=
+            RecE->getValueAsBit("isAsmParserOnly"))
       return false;
 
     // This is needed for instructions with intrinsic version (_Int).
@@ -243,9 +137,9 @@ public:
     // Also for instructions that their EVEX version was upgraded to work with
     // k-registers. For example VPCMPEQBrm (xmm output register) and
     // VPCMPEQBZ128rm (k register output register).
-    for (unsigned i = 0; i < Inst->Operands.size(); i++) {
-      Record *OpRec1 = Inst->Operands[i].Rec;
-      Record *OpRec2 = Inst2->Operands[i].Rec;
+    for (unsigned i = 0, e = EVEXInst->Operands.size(); i < e; i++) {
+      Record *OpRec1 = EVEXInst->Operands[i].Rec;
+      Record *OpRec2 = VEXInst->Operands[i].Rec;
 
       if (OpRec1 == OpRec2)
         continue;
@@ -315,7 +209,7 @@ void X86EVEX2VEXTablesEmitter::run(raw_ostream &OS) {
              !Inst->TheDef->getValueAsBit("hasEVEX_B") &&
              getValueFromBitsInit(Inst->TheDef->
                                         getValueAsBitsInit("EVEX_LL")) != 2 &&
-             !inExceptionList(Inst))
+             !Inst->TheDef->getValueAsBit("notEVEX2VEXConvertible"))
       EVEXInsts.push_back(Inst);
   }
 
@@ -324,22 +218,34 @@ void X86EVEX2VEXTablesEmitter::run(raw_ostream &OS) {
                                            getValueAsBitsInit("Opcode"));
     // For each EVEX instruction look for a VEX match in the appropriate vector
     // (instructions with the same opcode) using function object IsMatch.
-    auto Match = llvm::find_if(VEXInsts[Opcode], IsMatch(EVEXInst));
-    if (Match != VEXInsts[Opcode].end()) {
-      const CodeGenInstruction *VEXInst = *Match;
-
-      // In case a match is found add new entry to the appropriate table
-      switch (getValueFromBitsInit(
-          EVEXInst->TheDef->getValueAsBitsInit("EVEX_LL"))) {
-      case 0:
-        EVEX2VEX128.push_back(std::make_pair(EVEXInst, VEXInst)); // {0,0}
-        break;
-      case 1:
-        EVEX2VEX256.push_back(std::make_pair(EVEXInst, VEXInst)); // {0,1}
-        break;
-      default:
-        llvm_unreachable("Instruction's size not fit for the mapping!");
-      }
+    // Allow EVEX2VEXOverride to explicitly specify a match.
+    const CodeGenInstruction *VEXInst = nullptr;
+    if (!EVEXInst->TheDef->isValueUnset("EVEX2VEXOverride")) {
+      StringRef AltInstStr =
+        EVEXInst->TheDef->getValueAsString("EVEX2VEXOverride");
+      Record *AltInstRec = Records.getDef(AltInstStr);
+      assert(AltInstRec && "EVEX2VEXOverride instruction not found!");
+      VEXInst = &Target.getInstruction(AltInstRec);
+    } else {
+      auto Match = llvm::find_if(VEXInsts[Opcode], IsMatch(EVEXInst));
+      if (Match != VEXInsts[Opcode].end())
+        VEXInst = *Match;
+    }
+
+    if (!VEXInst)
+      continue;
+
+    // In case a match is found add new entry to the appropriate table
+    switch (getValueFromBitsInit(
+        EVEXInst->TheDef->getValueAsBitsInit("EVEX_LL"))) {
+    case 0:
+      EVEX2VEX128.push_back(std::make_pair(EVEXInst, VEXInst)); // {0,0}
+      break;
+    case 1:
+      EVEX2VEX256.push_back(std::make_pair(EVEXInst, VEXInst)); // {0,1}
+      break;
+    default:
+      llvm_unreachable("Instruction's size not fit for the mapping!");
     }
   }
 
diff --git a/utils/TableGen/X86FoldTablesEmitter.cpp b/utils/TableGen/X86FoldTablesEmitter.cpp
index ff1afa89efc8..1ea668643575 100644
--- a/utils/TableGen/X86FoldTablesEmitter.cpp
+++ b/utils/TableGen/X86FoldTablesEmitter.cpp
@@ -47,7 +47,9 @@ const char *ExplicitAlign[] = {"MOVDQA",  "MOVAPS",  "MOVAPD",  "MOVNTPS",
                                "MOVNTPD", "MOVNTDQ", "MOVNTDQA"};
 
 // List of instructions NOT requiring explicit memory alignment.
-const char *ExplicitUnalign[] = {"MOVDQU", "MOVUPS", "MOVUPD"};
+const char *ExplicitUnalign[] = {"MOVDQU", "MOVUPS", "MOVUPD",
+                                 "PCMPESTRM", "PCMPESTRI",
+                                 "PCMPISTRM", "PCMPISTRI" };
 
 // For manually mapping instructions that do not match by their encoding.
 const ManualMapEntry ManualMapSet[] = {
@@ -63,9 +65,9 @@ const ManualMapEntry ManualMapSet[] = {
     { "ADD16rr_DB",       "ADD16rm",         NO_UNFOLD  },
     { "ADD32rr_DB",       "ADD32rm",         NO_UNFOLD  },
     { "ADD64rr_DB",       "ADD64rm",         NO_UNFOLD  },
-    { "PUSH16r",          "PUSH16rmm",       NO_UNFOLD  },
-    { "PUSH32r",          "PUSH32rmm",       NO_UNFOLD  },
-    { "PUSH64r",          "PUSH64rmm",       NO_UNFOLD  },
+    { "PUSH16r",          "PUSH16rmm",       UNFOLD },
+    { "PUSH32r",          "PUSH32rmm",       UNFOLD },
+    { "PUSH64r",          "PUSH64rmm",       UNFOLD },
     { "TAILJMPr",         "TAILJMPm",        UNFOLD },
     { "TAILJMPr64",       "TAILJMPm64",      UNFOLD },
     { "TAILJMPr64_REX",   "TAILJMPm64_REX",  UNFOLD },
@@ -106,8 +108,8 @@ class X86FoldTablesEmitter {
 
     friend raw_ostream &operator<<(raw_ostream &OS,
                                    const X86FoldTableEntry &E) {
-      OS << "{ X86::" << E.RegInst->TheDef->getName().str()
-         << ", X86::" << E.MemInst->TheDef->getName().str() << ", ";
+      OS << "{ X86::" << E.RegInst->TheDef->getName()
+         << ", X86::" << E.MemInst->TheDef->getName() << ", ";
 
       if (E.IsLoad)
         OS << "TB_FOLDED_LOAD | ";
@@ -157,7 +159,7 @@ private:
 
   // Print the given table as a static const C++ array of type
   // X86MemoryFoldTableEntry.
-  void printTable(const FoldTable &Table, std::string TableName,
+  void printTable(const FoldTable &Table, StringRef TableName,
                   raw_ostream &OS) {
     OS << "static const X86MemoryFoldTableEntry MemoryFold" << TableName
        << "[] = {\n";
@@ -251,16 +253,6 @@ getMemOperandSize(const Record *MemRec, const bool IntrinsicSensitive = false) {
   llvm_unreachable("Memory operand's size not known!");
 }
 
-// Returns true if the record's list of defs includes the given def.
-static inline bool hasDefInList(const Record *Rec, const StringRef List,
-                                const StringRef Def) {
-  if (!Rec->isValueUnset(List)) {
-    return any_of(*(Rec->getValueAsListInit(List)),
-                  [Def](const Init *I) { return I->getAsString() == Def; });
-  }
-  return false;
-}
-
 // Return true if the instruction defined as a register flavor.
 static inline bool hasRegisterFormat(const Record *Inst) {
   const BitsInit *FormBits = Inst->getValueAsBitsInit("FormBits");
@@ -335,20 +327,24 @@ public:
             MemRec->getValueAsDef("OpPrefix") ||
         RegRec->getValueAsDef("OpMap") != MemRec->getValueAsDef("OpMap") ||
         RegRec->getValueAsDef("OpSize") != MemRec->getValueAsDef("OpSize") ||
+        RegRec->getValueAsDef("AdSize") != MemRec->getValueAsDef("AdSize") ||
         RegRec->getValueAsBit("hasVEX_4V") !=
             MemRec->getValueAsBit("hasVEX_4V") ||
         RegRec->getValueAsBit("hasEVEX_K") !=
             MemRec->getValueAsBit("hasEVEX_K") ||
         RegRec->getValueAsBit("hasEVEX_Z") !=
             MemRec->getValueAsBit("hasEVEX_Z") ||
-        RegRec->getValueAsBit("hasEVEX_B") !=
-            MemRec->getValueAsBit("hasEVEX_B") ||
+        // EVEX_B means different things for memory and register forms.
+        RegRec->getValueAsBit("hasEVEX_B") != 0 ||
+        MemRec->getValueAsBit("hasEVEX_B") != 0 ||
         RegRec->getValueAsBit("hasEVEX_RC") !=
             MemRec->getValueAsBit("hasEVEX_RC") ||
         RegRec->getValueAsBit("hasREX_WPrefix") !=
             MemRec->getValueAsBit("hasREX_WPrefix") ||
         RegRec->getValueAsBit("hasLockPrefix") !=
             MemRec->getValueAsBit("hasLockPrefix") ||
+        RegRec->getValueAsBit("hasNoTrackPrefix") !=
+            MemRec->getValueAsBit("hasNoTrackPrefix") ||
         !equalBitsInits(RegRec->getValueAsBitsInit("EVEX_LL"),
                         MemRec->getValueAsBitsInit("EVEX_LL")) ||
         !equalBitsInits(RegRec->getValueAsBitsInit("VEX_WPrefix"),
@@ -511,10 +507,8 @@ void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInstr,
   unsigned MemInSize = MemRec->getValueAsDag("InOperandList")->getNumArgs();
   unsigned RegInSize = RegRec->getValueAsDag("InOperandList")->getNumArgs();
 
-  // Instructions which have the WriteRMW value (Read-Modify-Write) should be
-  // added to Table2Addr.
-  if (hasDefInList(MemRec, "SchedRW", "WriteRMW") && MemOutSize != RegOutSize &&
-      MemInSize == RegInSize) {
+  // Instructions which Read-Modify-Write should be added to Table2Addr.
+  if (MemOutSize != RegOutSize && MemInSize == RegInSize) {
     addEntryWithFlags(Table2Addr, RegInstr, MemInstr, S, 0);
     return;
   }
@@ -548,7 +542,7 @@ void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInstr,
     }
   } else if (MemInSize == RegInSize + 1 && MemOutSize + 1 == RegOutSize) {
     // Store-Folding cases.
-    // If the memory form instruction performs performs a store, the *output*
+    // If the memory form instruction performs a store, the *output*
     // register of the register form instructions disappear and instead a
     // memory *input* operand appears in the memory form instruction.
     // For example:
@@ -556,7 +550,8 @@ void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInstr,
     //   MOVAPSmr => (outs), (ins f128mem:$dst, VR128:$src)
     Record *RegOpRec = RegInstr->Operands[RegOutSize - 1].Rec;
     Record *MemOpRec = MemInstr->Operands[RegOutSize - 1].Rec;
-    if (isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec))
+    if (isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec) &&
+        getRegOperandSize(RegOpRec) == getMemOperandSize(MemOpRec))
       addEntryWithFlags(Table0, RegInstr, MemInstr, S, 0);
   }
 
diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp
index 9afdd7e09638..efd5c195d02b 100644
--- a/utils/TableGen/X86RecognizableInstr.cpp
+++ b/utils/TableGen/X86RecognizableInstr.cpp
@@ -40,7 +40,7 @@ static uint8_t byteFromBitsInit(BitsInit &init) {
   uint8_t ret = 0;
 
   for (index = 0; index < width; index++) {
-    if (static_cast<BitInit*>(init.getBit(index))->getValue())
+    if (cast<BitInit>(init.getBit(index))->getValue())
       ret |= mask;
 
     mask <<= 1;
@@ -80,19 +80,19 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
   Form     = byteFromRec(Rec, "FormBits");
   Encoding = byteFromRec(Rec, "OpEncBits");
 
-  OpSize           = byteFromRec(Rec, "OpSizeBits");
-  AdSize           = byteFromRec(Rec, "AdSizeBits");
-  HasREX_WPrefix   = Rec->getValueAsBit("hasREX_WPrefix");
-  HasVEX_4V        = Rec->getValueAsBit("hasVEX_4V");
-  VEX_WPrefix      = byteFromRec(Rec,"VEX_WPrefix");
-  IgnoresVEX_L     = Rec->getValueAsBit("ignoresVEX_L");
-  HasEVEX_L2Prefix = Rec->getValueAsBit("hasEVEX_L2");
-  HasEVEX_K        = Rec->getValueAsBit("hasEVEX_K");
-  HasEVEX_KZ       = Rec->getValueAsBit("hasEVEX_Z");
-  HasEVEX_B        = Rec->getValueAsBit("hasEVEX_B");
-  IsCodeGenOnly    = Rec->getValueAsBit("isCodeGenOnly");
-  ForceDisassemble = Rec->getValueAsBit("ForceDisassemble");
-  CD8_Scale        = byteFromRec(Rec, "CD8_Scale");
+  OpSize             = byteFromRec(Rec, "OpSizeBits");
+  AdSize             = byteFromRec(Rec, "AdSizeBits");
+  HasREX_WPrefix     = Rec->getValueAsBit("hasREX_WPrefix");
+  HasVEX_4V          = Rec->getValueAsBit("hasVEX_4V");
+  VEX_WPrefix        = byteFromRec(Rec,"VEX_WPrefix");
+  IgnoresVEX_L       = Rec->getValueAsBit("ignoresVEX_L");
+  HasEVEX_L2Prefix   = Rec->getValueAsBit("hasEVEX_L2");
+  HasEVEX_K          = Rec->getValueAsBit("hasEVEX_K");
+  HasEVEX_KZ         = Rec->getValueAsBit("hasEVEX_Z");
+  HasEVEX_B          = Rec->getValueAsBit("hasEVEX_B");
+  IsCodeGenOnly      = Rec->getValueAsBit("isCodeGenOnly");
+  ForceDisassemble   = Rec->getValueAsBit("ForceDisassemble");
+  CD8_Scale          = byteFromRec(Rec, "CD8_Scale");
 
   Name      = Rec->getName();
 
@@ -164,7 +164,8 @@ InstructionContext RecognizableInstr::insnContext() const {
       llvm_unreachable("Don't support VEX.L if EVEX_L2 is enabled");
     }
     // VEX_L & VEX_W
-    if (!EncodeRC && HasVEX_LPrefix && VEX_WPrefix == X86Local::VEX_W1) {
+    if (!EncodeRC && HasVEX_LPrefix && (VEX_WPrefix == X86Local::VEX_W1 ||
+                                        VEX_WPrefix == X86Local::VEX_W1X)) {
       if (OpPrefix == X86Local::PD)
         insnContext = EVEX_KB(IC_EVEX_L_W_OPSIZE);
       else if (OpPrefix == X86Local::XS)
@@ -192,7 +193,8 @@ InstructionContext RecognizableInstr::insnContext() const {
         llvm_unreachable("Invalid prefix");
       }
     } else if (!EncodeRC && HasEVEX_L2Prefix &&
-               VEX_WPrefix == X86Local::VEX_W1) {
+               (VEX_WPrefix == X86Local::VEX_W1 ||
+                VEX_WPrefix == X86Local::VEX_W1X)) {
       // EVEX_L2 & VEX_W
       if (OpPrefix == X86Local::PD)
         insnContext = EVEX_KB(IC_EVEX_L2_W_OPSIZE);
@@ -221,7 +223,8 @@ InstructionContext RecognizableInstr::insnContext() const {
         llvm_unreachable("Invalid prefix");
       }
     }
-    else if (VEX_WPrefix == X86Local::VEX_W1) {
+    else if (VEX_WPrefix == X86Local::VEX_W1 ||
+             VEX_WPrefix == X86Local::VEX_W1X) {
       // VEX_W
       if (OpPrefix == X86Local::PD)
         insnContext = EVEX_KB(IC_EVEX_W_OPSIZE);
@@ -243,11 +246,16 @@ InstructionContext RecognizableInstr::insnContext() const {
       insnContext = EVEX_KB(IC_EVEX_XD);
     else if (OpPrefix == X86Local::XS)
       insnContext = EVEX_KB(IC_EVEX_XS);
-    else
+    else if (OpPrefix == X86Local::PS)
       insnContext = EVEX_KB(IC_EVEX);
+    else {
+      errs() << "Instruction does not use a prefix: " << Name << "\n";
+      llvm_unreachable("Invalid prefix");
+    }
     /// eof EVEX
   } else if (Encoding == X86Local::VEX || Encoding == X86Local::XOP) {
-    if (HasVEX_LPrefix && VEX_WPrefix == X86Local::VEX_W1) {
+    if (HasVEX_LPrefix && (VEX_WPrefix == X86Local::VEX_W1 ||
+                           VEX_WPrefix == X86Local::VEX_W1X)) {
       if (OpPrefix == X86Local::PD)
         insnContext = IC_VEX_L_W_OPSIZE;
       else if (OpPrefix == X86Local::XS)
@@ -262,7 +270,8 @@ InstructionContext RecognizableInstr::insnContext() const {
       }
     } else if (OpPrefix == X86Local::PD && HasVEX_LPrefix)
       insnContext = IC_VEX_L_OPSIZE;
-    else if (OpPrefix == X86Local::PD && VEX_WPrefix == X86Local::VEX_W1)
+    else if (OpPrefix == X86Local::PD && (VEX_WPrefix == X86Local::VEX_W1 ||
+                                          VEX_WPrefix == X86Local::VEX_W1X))
       insnContext = IC_VEX_W_OPSIZE;
     else if (OpPrefix == X86Local::PD)
       insnContext = IC_VEX_OPSIZE;
@@ -270,11 +279,14 @@ InstructionContext RecognizableInstr::insnContext() const {
       insnContext = IC_VEX_L_XS;
     else if (HasVEX_LPrefix && OpPrefix == X86Local::XD)
       insnContext = IC_VEX_L_XD;
-    else if (VEX_WPrefix == X86Local::VEX_W1 && OpPrefix == X86Local::XS)
+    else if ((VEX_WPrefix == X86Local::VEX_W1 ||
+              VEX_WPrefix == X86Local::VEX_W1X) && OpPrefix == X86Local::XS)
       insnContext = IC_VEX_W_XS;
-    else if (VEX_WPrefix == X86Local::VEX_W1 && OpPrefix == X86Local::XD)
+    else if ((VEX_WPrefix == X86Local::VEX_W1 ||
+              VEX_WPrefix == X86Local::VEX_W1X) && OpPrefix == X86Local::XD)
       insnContext = IC_VEX_W_XD;
-    else if (VEX_WPrefix == X86Local::VEX_W1 && OpPrefix == X86Local::PS)
+    else if ((VEX_WPrefix == X86Local::VEX_W1 ||
+              VEX_WPrefix == X86Local::VEX_W1X) && OpPrefix == X86Local::PS)
       insnContext = IC_VEX_W;
     else if (HasVEX_LPrefix && OpPrefix == X86Local::PS)
       insnContext = IC_VEX_L;
@@ -297,6 +309,8 @@ InstructionContext RecognizableInstr::insnContext() const {
       insnContext = IC_64BIT_XD_OPSIZE;
     else if (OpSize == X86Local::OpSize16 && OpPrefix == X86Local::XS)
       insnContext = IC_64BIT_XS_OPSIZE;
+    else if (AdSize == X86Local::AdSize32 && OpPrefix == X86Local::PD)
+      insnContext = IC_64BIT_OPSIZE_ADSIZE;
     else if (OpSize == X86Local::OpSize16 && AdSize == X86Local::AdSize32)
       insnContext = IC_64BIT_OPSIZE_ADSIZE;
     else if (OpSize == X86Local::OpSize16 || OpPrefix == X86Local::PD)
@@ -320,6 +334,12 @@ InstructionContext RecognizableInstr::insnContext() const {
       insnContext = IC_XD_OPSIZE;
     else if (OpSize == X86Local::OpSize16 && OpPrefix == X86Local::XS)
       insnContext = IC_XS_OPSIZE;
+    else if (AdSize == X86Local::AdSize16 && OpPrefix == X86Local::XD)
+      insnContext = IC_XD_ADSIZE;
+    else if (AdSize == X86Local::AdSize16 && OpPrefix == X86Local::XS)
+      insnContext = IC_XS_ADSIZE;
+    else if (AdSize == X86Local::AdSize16 && OpPrefix == X86Local::PD)
+      insnContext = IC_OPSIZE_ADSIZE;
     else if (OpSize == X86Local::OpSize16 && AdSize == X86Local::AdSize16)
       insnContext = IC_OPSIZE_ADSIZE;
     else if (OpSize == X86Local::OpSize16 || OpPrefix == X86Local::PD)
@@ -544,7 +564,6 @@ void RecognizableInstr::emitInstructionSpecifier() {
     HANDLE_OPERAND(rmRegister)
     HANDLE_OPTIONAL(immediate)
     HANDLE_OPTIONAL(immediate) // above might be a register in 7:4
-    HANDLE_OPTIONAL(immediate)
     break;
   case X86Local::MRMSrcReg4VOp3:
     assert(numPhysicalOperands == 3 &&
@@ -663,41 +682,15 @@ void RecognizableInstr::emitInstructionSpecifier() {
     HANDLE_OPERAND(immediate)
     HANDLE_OPERAND(immediate)
     break;
-  case X86Local::MRM_F8:
-    if (Opcode == 0xc6) {
-      assert(numPhysicalOperands == 1 &&
-             "Unexpected number of operands for X86Local::MRM_F8");
-      HANDLE_OPERAND(immediate)
-    } else if (Opcode == 0xc7) {
-      assert(numPhysicalOperands == 1 &&
-             "Unexpected number of operands for X86Local::MRM_F8");
-      HANDLE_OPERAND(relocation)
-    }
-    break;
-  case X86Local::MRM_C0: case X86Local::MRM_C1: case X86Local::MRM_C2:
-  case X86Local::MRM_C3: case X86Local::MRM_C4: case X86Local::MRM_C8:
-  case X86Local::MRM_C9: case X86Local::MRM_CA: case X86Local::MRM_CB:
-  case X86Local::MRM_CF: case X86Local::MRM_D0: case X86Local::MRM_D1:
-  case X86Local::MRM_D4: case X86Local::MRM_D5: case X86Local::MRM_D6:
-  case X86Local::MRM_D7: case X86Local::MRM_D8: case X86Local::MRM_D9:
-  case X86Local::MRM_DA: case X86Local::MRM_DB: case X86Local::MRM_DC:
-  case X86Local::MRM_DD: case X86Local::MRM_DE: case X86Local::MRM_DF:
-  case X86Local::MRM_E0: case X86Local::MRM_E1: case X86Local::MRM_E2:
-  case X86Local::MRM_E3: case X86Local::MRM_E4: case X86Local::MRM_E5:
-  case X86Local::MRM_E8: case X86Local::MRM_E9: case X86Local::MRM_EA:
-  case X86Local::MRM_EB: case X86Local::MRM_EC: case X86Local::MRM_ED:
-  case X86Local::MRM_EE: case X86Local::MRM_EF: case X86Local::MRM_F0:
-  case X86Local::MRM_F1: case X86Local::MRM_F2: case X86Local::MRM_F3:
-  case X86Local::MRM_F4: case X86Local::MRM_F5: case X86Local::MRM_F6:
-  case X86Local::MRM_F7: case X86Local::MRM_F9: case X86Local::MRM_FA:
-  case X86Local::MRM_FB: case X86Local::MRM_FC: case X86Local::MRM_FD:
-  case X86Local::MRM_FE: case X86Local::MRM_FF:
-    // Ignored.
+#define MAP(from, to) case X86Local::MRM_##from:
+  X86_INSTR_MRM_MAPPING
+#undef MAP
+    HANDLE_OPTIONAL(relocation)
     break;
   }
 
-  #undef HANDLE_OPERAND
-  #undef HANDLE_OPTIONAL
+#undef HANDLE_OPERAND
+#undef HANDLE_OPTIONAL
 }
 
 void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
@@ -707,77 +700,64 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
   case X86Local::MRM_##from:
 
   llvm::Optional<OpcodeType> opcodeType;
-
-  ModRMFilter*  filter      = nullptr;
-  uint8_t       opcodeToSet = 0;
-
   switch (OpMap) {
   default: llvm_unreachable("Invalid map!");
-  case X86Local::OB:
-  case X86Local::TB:
-  case X86Local::T8:
-  case X86Local::TA:
-  case X86Local::XOP8:
-  case X86Local::XOP9:
-  case X86Local::XOPA:
-    switch (OpMap) {
-    default: llvm_unreachable("Unexpected map!");
-    case X86Local::OB:   opcodeType = ONEBYTE;      break;
-    case X86Local::TB:   opcodeType = TWOBYTE;      break;
-    case X86Local::T8:   opcodeType = THREEBYTE_38; break;
-    case X86Local::TA:   opcodeType = THREEBYTE_3A; break;
-    case X86Local::XOP8: opcodeType = XOP8_MAP;     break;
-    case X86Local::XOP9: opcodeType = XOP9_MAP;     break;
-    case X86Local::XOPA: opcodeType = XOPA_MAP;     break;
-    }
-
-    switch (Form) {
-    default: llvm_unreachable("Invalid form!");
-    case X86Local::Pseudo: llvm_unreachable("Pseudo should not be emitted!");
-    case X86Local::RawFrm:
-    case X86Local::AddRegFrm:
-    case X86Local::RawFrmMemOffs:
-    case X86Local::RawFrmSrc:
-    case X86Local::RawFrmDst:
-    case X86Local::RawFrmDstSrc:
-    case X86Local::RawFrmImm8:
-    case X86Local::RawFrmImm16:
-      filter = new DumbFilter();
-      break;
-    case X86Local::MRMDestReg:
-    case X86Local::MRMSrcReg:
-    case X86Local::MRMSrcReg4VOp3:
-    case X86Local::MRMSrcRegOp4:
-    case X86Local::MRMXr:
-      filter = new ModFilter(true);
-      break;
-    case X86Local::MRMDestMem:
-    case X86Local::MRMSrcMem:
-    case X86Local::MRMSrcMem4VOp3:
-    case X86Local::MRMSrcMemOp4:
-    case X86Local::MRMXm:
-      filter = new ModFilter(false);
-      break;
-    case X86Local::MRM0r:      case X86Local::MRM1r:
-    case X86Local::MRM2r:      case X86Local::MRM3r:
-    case X86Local::MRM4r:      case X86Local::MRM5r:
-    case X86Local::MRM6r:      case X86Local::MRM7r:
-      filter = new ExtendedFilter(true, Form - X86Local::MRM0r);
-      break;
-    case X86Local::MRM0m:      case X86Local::MRM1m:
-    case X86Local::MRM2m:      case X86Local::MRM3m:
-    case X86Local::MRM4m:      case X86Local::MRM5m:
-    case X86Local::MRM6m:      case X86Local::MRM7m:
-      filter = new ExtendedFilter(false, Form - X86Local::MRM0m);
-      break;
-    X86_INSTR_MRM_MAPPING
-      filter = new ExactFilter(0xC0 + Form - X86Local::MRM_C0);   \
-      break;
-    } // switch (Form)
+  case X86Local::OB:        opcodeType = ONEBYTE;       break;
+  case X86Local::TB:        opcodeType = TWOBYTE;       break;
+  case X86Local::T8:        opcodeType = THREEBYTE_38;  break;
+  case X86Local::TA:        opcodeType = THREEBYTE_3A;  break;
+  case X86Local::XOP8:      opcodeType = XOP8_MAP;      break;
+  case X86Local::XOP9:      opcodeType = XOP9_MAP;      break;
+  case X86Local::XOPA:      opcodeType = XOPA_MAP;      break;
+  case X86Local::ThreeDNow: opcodeType = THREEDNOW_MAP; break;
+  }
 
-    opcodeToSet = Opcode;
+  std::unique_ptr<ModRMFilter> filter;
+  switch (Form) {
+  default: llvm_unreachable("Invalid form!");
+  case X86Local::Pseudo: llvm_unreachable("Pseudo should not be emitted!");
+  case X86Local::RawFrm:
+  case X86Local::AddRegFrm:
+  case X86Local::RawFrmMemOffs:
+  case X86Local::RawFrmSrc:
+  case X86Local::RawFrmDst:
+  case X86Local::RawFrmDstSrc:
+  case X86Local::RawFrmImm8:
+  case X86Local::RawFrmImm16:
+    filter = llvm::make_unique<DumbFilter>();
     break;
-  } // switch (OpMap)
+  case X86Local::MRMDestReg:
+  case X86Local::MRMSrcReg:
+  case X86Local::MRMSrcReg4VOp3:
+  case X86Local::MRMSrcRegOp4:
+  case X86Local::MRMXr:
+    filter = llvm::make_unique<ModFilter>(true);
+    break;
+  case X86Local::MRMDestMem:
+  case X86Local::MRMSrcMem:
+  case X86Local::MRMSrcMem4VOp3:
+  case X86Local::MRMSrcMemOp4:
+  case X86Local::MRMXm:
+    filter = llvm::make_unique<ModFilter>(false);
+    break;
+  case X86Local::MRM0r: case X86Local::MRM1r:
+  case X86Local::MRM2r: case X86Local::MRM3r:
+  case X86Local::MRM4r: case X86Local::MRM5r:
+  case X86Local::MRM6r: case X86Local::MRM7r:
+    filter = llvm::make_unique<ExtendedFilter>(true, Form - X86Local::MRM0r);
+    break;
+  case X86Local::MRM0m: case X86Local::MRM1m:
+  case X86Local::MRM2m: case X86Local::MRM3m:
+  case X86Local::MRM4m: case X86Local::MRM5m:
+  case X86Local::MRM6m: case X86Local::MRM7m:
+    filter = llvm::make_unique<ExtendedFilter>(false, Form - X86Local::MRM0m);
+    break;
+  X86_INSTR_MRM_MAPPING
+    filter = llvm::make_unique<ExactFilter>(0xC0 + Form - X86Local::MRM_C0);
+    break;
+  } // switch (Form)
+
+  uint8_t opcodeToSet = Opcode;
 
   unsigned AddressSize = 0;
   switch (AdSize) {
@@ -808,8 +788,6 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
                           VEX_WPrefix == X86Local::VEX_WIG, AddressSize);
   }
 
-  delete filter;
-
 #undef MAP
 }
 
@@ -884,10 +862,7 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
   TYPE("VR64",                TYPE_MM64)
   TYPE("i64imm",              TYPE_IMM)
   TYPE("anymem",              TYPE_M)
-  TYPE("opaque32mem",         TYPE_M)
-  TYPE("opaque48mem",         TYPE_M)
-  TYPE("opaque80mem",         TYPE_M)
-  TYPE("opaque512mem",        TYPE_M)
+  TYPE("opaquemem",           TYPE_M)
   TYPE("SEGMENT_REG",         TYPE_SEGMENTREG)
   TYPE("DEBUG_REG",           TYPE_DEBUGREG)
   TYPE("CONTROL_REG",         TYPE_CONTROLREG)
@@ -927,7 +902,6 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
   TYPE("VK32WM",              TYPE_VK)
   TYPE("VK64",                TYPE_VK)
   TYPE("VK64WM",              TYPE_VK)
-  TYPE("GR32_NOAX",           TYPE_Rv)
   TYPE("vx64mem",             TYPE_MVSIBX)
   TYPE("vx128mem",            TYPE_MVSIBX)
   TYPE("vx256mem",            TYPE_MVSIBX)
@@ -938,8 +912,8 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
   TYPE("vx256xmem",           TYPE_MVSIBX)
   TYPE("vy128xmem",           TYPE_MVSIBY)
   TYPE("vy256xmem",           TYPE_MVSIBY)
-  TYPE("vy512mem",            TYPE_MVSIBY)
-  TYPE("vz256xmem",           TYPE_MVSIBZ)
+  TYPE("vy512xmem",           TYPE_MVSIBY)
+  TYPE("vz256mem",            TYPE_MVSIBZ)
   TYPE("vz512mem",            TYPE_MVSIBZ)
   TYPE("BNDR",                TYPE_BNDR)
   errs() << "Unhandled type string " << s << "\n";
@@ -1120,10 +1094,7 @@ RecognizableInstr::memoryEncodingFromString(const std::string &s,
   ENCODING("lea64_32mem",     ENCODING_RM)
   ENCODING("lea64mem",        ENCODING_RM)
   ENCODING("anymem",          ENCODING_RM)
-  ENCODING("opaque32mem",     ENCODING_RM)
-  ENCODING("opaque48mem",     ENCODING_RM)
-  ENCODING("opaque80mem",     ENCODING_RM)
-  ENCODING("opaque512mem",    ENCODING_RM)
+  ENCODING("opaquemem",       ENCODING_RM)
   ENCODING("vx64mem",         ENCODING_VSIB)
   ENCODING("vx128mem",        ENCODING_VSIB)
   ENCODING("vx256mem",        ENCODING_VSIB)
@@ -1134,8 +1105,8 @@ RecognizableInstr::memoryEncodingFromString(const std::string &s,
   ENCODING("vx256xmem",       ENCODING_VSIB)
   ENCODING("vy128xmem",       ENCODING_VSIB)
   ENCODING("vy256xmem",       ENCODING_VSIB)
-  ENCODING("vy512mem",        ENCODING_VSIB)
-  ENCODING("vz256xmem",       ENCODING_VSIB)
+  ENCODING("vy512xmem",       ENCODING_VSIB)
+  ENCODING("vz256mem",        ENCODING_VSIB)
   ENCODING("vz512mem",        ENCODING_VSIB)
   errs() << "Unhandled memory encoding " << s << "\n";
   llvm_unreachable("Unhandled memory encoding");
@@ -1195,7 +1166,6 @@ RecognizableInstr::opcodeModifierEncodingFromString(const std::string &s,
   ENCODING("GR64",            ENCODING_RO)
   ENCODING("GR16",            ENCODING_Rv)
   ENCODING("GR8",             ENCODING_RB)
-  ENCODING("GR32_NOAX",       ENCODING_Rv)
   errs() << "Unhandled opcode modifier encoding " << s << "\n";
   llvm_unreachable("Unhandled opcode modifier encoding");
 }
diff --git a/utils/TableGen/X86RecognizableInstr.h b/utils/TableGen/X86RecognizableInstr.h
index 24509d16d638..c4d34ee6c80c 100644
--- a/utils/TableGen/X86RecognizableInstr.h
+++ b/utils/TableGen/X86RecognizableInstr.h
@@ -122,11 +122,11 @@ namespace X86Local {
   };
 
   enum {
-    OB = 0, TB = 1, T8 = 2, TA = 3, XOP8 = 4, XOP9 = 5, XOPA = 6
+    OB = 0, TB = 1, T8 = 2, TA = 3, XOP8 = 4, XOP9 = 5, XOPA = 6, ThreeDNow = 7
   };
 
   enum {
-    PS = 1, PD = 2, XS = 3, XD = 4
+    PD = 1, XS = 2, XD = 3, PS = 4
   };
 
   enum {
@@ -142,7 +142,7 @@ namespace X86Local {
   };
 
   enum {
-    VEX_W0 = 0, VEX_W1 = 1, VEX_WIG = 2
+    VEX_W0 = 0, VEX_W1 = 1, VEX_WIG = 2, VEX_W1X = 3
   };
 }
 
@@ -210,12 +210,12 @@ private:
   /// Indicates whether the instruction should be emitted into the decode
   /// tables; regardless, it will be emitted into the instruction info table
   bool ShouldBeEmitted;
-  
+
   /// The operands of the instruction, as listed in the CodeGenInstruction.
   /// They are not one-to-one with operands listed in the MCInst; for example,
   /// memory operands expand to 5 operands in the MCInst
   const std::vector<CGIOperandList::OperandInfo>* Operands;
-  
+
   /// The description of the instruction that is emitted into the instruction
   /// info table
   InstructionSpecifier* Spec;
@@ -272,7 +272,7 @@ private:
   static OperandEncoding writemaskRegisterEncodingFromString(const std::string &s,
                                                              uint8_t OpSize);
 
-  /// \brief Adjust the encoding type for an operand based on the instruction.
+  /// Adjust the encoding type for an operand based on the instruction.
   void adjustOperandEncoding(OperandEncoding &encoding);
 
   /// handleOperand - Converts a single operand from the LLVM table format to
@@ -283,7 +283,7 @@ private:
   ///                               operand exists.
   /// @param operandIndex         - The index into the generated operand table.
   ///                               Incremented by this function one or more
-  ///                               times to reflect possible duplicate 
+  ///                               times to reflect possible duplicate
   ///                               operands).
   /// @param physicalOperandIndex - The index of the current operand into the
   ///                               set of non-duplicate ('physical') operands.
@@ -314,12 +314,12 @@ private:
   bool shouldBeEmitted() const {
     return ShouldBeEmitted;
   }
-  
+
   /// emitInstructionSpecifier - Loads the instruction specifier for the current
   ///   instruction into a DisassemblerTables.
   ///
   void emitInstructionSpecifier();
-  
+
   /// emitDecodePath - Populates the proper fields in the decode tables
   ///   corresponding to the decode paths for this instruction.
   ///
@@ -349,7 +349,7 @@ public:
                            const CodeGenInstruction &insn,
                            InstrUID uid);
 };
-  
+
 } // namespace X86Disassembler
 
 } // namespace llvm
diff --git a/utils/UpdateTestChecks/__init__.py b/utils/UpdateTestChecks/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/utils/UpdateTestChecks/__init__.py
diff --git a/utils/UpdateTestChecks/asm.py b/utils/UpdateTestChecks/asm.py
new file mode 100644
index 000000000000..726a653d1516
--- /dev/null
+++ b/utils/UpdateTestChecks/asm.py
@@ -0,0 +1,259 @@
+import re
+import sys
+
+from . import common
+
+if sys.version_info[0] > 2:
+  class string:
+    expandtabs = str.expandtabs
+else:
+  import string
+
+# RegEx: this is where the magic happens.
+
+##### Assembly parser
+
+ASM_FUNCTION_X86_RE = re.compile(
+    r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n(?:\s*.Lfunc_begin[^:\n]*:\n)?[^:]*?'
+    r'(?P<body>^##?[ \t]+[^:]+:.*?)\s*'
+    r'^\s*(?:[^:\n]+?:\s*\n\s*\.size|\.cfi_endproc|\.globl|\.comm|\.(?:sub)?section|#+ -- End function)',
+    flags=(re.M | re.S))
+
+ASM_FUNCTION_ARM_RE = re.compile(
+        r'^(?P<func>[0-9a-zA-Z_]+):\n' # f: (name of function)
+        r'\s+\.fnstart\n' # .fnstart
+        r'(?P<body>.*?)\n' # (body of the function)
+        r'.Lfunc_end[0-9]+:', # .Lfunc_end0: or # -- End function
+        flags=(re.M | re.S))
+
+ASM_FUNCTION_AARCH64_RE = re.compile(
+     r'^_?(?P<func>[^:]+):[ \t]*\/\/[ \t]*@(?P=func)\n'
+     r'(?:[ \t]+.cfi_startproc\n)?'  # drop optional cfi noise 
+     r'(?P<body>.*?)\n'
+     # This list is incomplete
+     r'.Lfunc_end[0-9]+:\n',
+     flags=(re.M | re.S))
+
+ASM_FUNCTION_AMDGPU_RE = re.compile(
+    r'^_?(?P<func>[^:]+):[ \t]*;+[ \t]*@(?P=func)\n[^:]*?'
+    r'(?P<body>.*?)\n' # (body of the function)
+    # This list is incomplete
+    r'.Lfunc_end[0-9]+:\n',
+    flags=(re.M | re.S))
+
+ASM_FUNCTION_MIPS_RE = re.compile(
+    r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?' # f: (name of func)
+    r'(?:^[ \t]+\.(frame|f?mask|set).*?\n)+'  # Mips+LLVM standard asm prologue
+    r'(?P<body>.*?)\n'                        # (body of the function)
+    r'(?:^[ \t]+\.(set|end).*?\n)+'           # Mips+LLVM standard asm epilogue
+    r'(\$|\.L)func_end[0-9]+:\n',             # $func_end0: (mips32 - O32) or
+                                              # .Lfunc_end0: (mips64 - NewABI)
+    flags=(re.M | re.S))
+
+ASM_FUNCTION_PPC_RE = re.compile(
+    r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n'
+    r'\.Lfunc_begin[0-9]+:\n'
+    r'(?:[ \t]+.cfi_startproc\n)?'
+    r'(?:\.Lfunc_[gl]ep[0-9]+:\n(?:[ \t]+.*?\n)*)*'
+    r'(?P<body>.*?)\n'
+    # This list is incomplete
+    r'(?:^[ \t]*(?:\.long[ \t]+[^\n]+|\.quad[ \t]+[^\n]+)\n)*'
+    r'.Lfunc_end[0-9]+:\n',
+    flags=(re.M | re.S))
+
+ASM_FUNCTION_RISCV_RE = re.compile(
+    r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?'
+    r'(?P<body>^##?[ \t]+[^:]+:.*?)\s*'
+    r'.Lfunc_end[0-9]+:\n',
+    flags=(re.M | re.S))
+
+ASM_FUNCTION_SPARC_RE = re.compile(
+    r'^_?(?P<func>[^:]+):[ \t]*!+[ \t]*@(?P=func)\n'
+    r'(?P<body>.*?)\s*'
+    r'.Lfunc_end[0-9]+:\n',
+    flags=(re.M | re.S))
+
+ASM_FUNCTION_SYSTEMZ_RE = re.compile(
+    r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n'
+    r'[ \t]+.cfi_startproc\n'
+    r'(?P<body>.*?)\n'
+    r'.Lfunc_end[0-9]+:\n',
+    flags=(re.M | re.S))
+
+
+SCRUB_LOOP_COMMENT_RE = re.compile(
+    r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M)
+
+SCRUB_X86_SHUFFLES_RE = (
+    re.compile(
+        r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem)( \{%k\d+\}( \{z\})?)? = .*)$',
+        flags=re.M))
+SCRUB_X86_SPILL_RELOAD_RE = (
+    re.compile(
+        r'-?\d+\(%([er])[sb]p\)(.*(?:Spill|Reload))$',
+        flags=re.M))
+SCRUB_X86_SP_RE = re.compile(r'\d+\(%(esp|rsp)\)')
+SCRUB_X86_RIP_RE = re.compile(r'[.\w]+\(%rip\)')
+SCRUB_X86_LCP_RE = re.compile(r'\.LCPI[0-9]+_[0-9]+')
+SCRUB_X86_RET_RE = re.compile(r'ret[l|q]')
+
+def scrub_asm_x86(asm, args):
+  # Scrub runs of whitespace out of the assembly, but leave the leading
+  # whitespace in place.
+  asm = common.SCRUB_WHITESPACE_RE.sub(r' ', asm)
+  # Expand the tabs used for indentation.
+  asm = string.expandtabs(asm, 2)
+  # Detect shuffle asm comments and hide the operands in favor of the comments.
+  asm = SCRUB_X86_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm)
+  # Detect stack spills and reloads and hide their exact offset and whether
+  # they used the stack pointer or frame pointer.
+  asm = SCRUB_X86_SPILL_RELOAD_RE.sub(r'{{[-0-9]+}}(%\1{{[sb]}}p)\2', asm)
+  # Generically match the stack offset of a memory operand.
+  asm = SCRUB_X86_SP_RE.sub(r'{{[0-9]+}}(%\1)', asm)
+  if getattr(args, 'x86_scrub_rip', False):
+    # Generically match a RIP-relative memory operand.
+    asm = SCRUB_X86_RIP_RE.sub(r'{{.*}}(%rip)', asm)
+  # Generically match a LCP symbol.
+  asm = SCRUB_X86_LCP_RE.sub(r'{{\.LCPI.*}}', asm)
+  if getattr(args, 'extra_scrub', False):
+    # Avoid generating different checks for 32- and 64-bit because of 'retl' vs 'retq'.
+    asm = SCRUB_X86_RET_RE.sub(r'ret{{[l|q]}}', asm)
+  # Strip kill operands inserted into the asm.
+  asm = common.SCRUB_KILL_COMMENT_RE.sub('', asm)
+  # Strip trailing whitespace.
+  asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
+  return asm
+
+def scrub_asm_amdgpu(asm, args):
+  # Scrub runs of whitespace out of the assembly, but leave the leading
+  # whitespace in place.
+  asm = common.SCRUB_WHITESPACE_RE.sub(r' ', asm)
+  # Expand the tabs used for indentation.
+  asm = string.expandtabs(asm, 2)
+  # Strip trailing whitespace.
+  asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
+  return asm
+
+def scrub_asm_arm_eabi(asm, args):
+  # Scrub runs of whitespace out of the assembly, but leave the leading
+  # whitespace in place.
+  asm = common.SCRUB_WHITESPACE_RE.sub(r' ', asm)
+  # Expand the tabs used for indentation.
+  asm = string.expandtabs(asm, 2)
+  # Strip kill operands inserted into the asm.
+  asm = common.SCRUB_KILL_COMMENT_RE.sub('', asm)
+  # Strip trailing whitespace.
+  asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
+  return asm
+
+def scrub_asm_powerpc64(asm, args):
+  # Scrub runs of whitespace out of the assembly, but leave the leading
+  # whitespace in place.
+  asm = common.SCRUB_WHITESPACE_RE.sub(r' ', asm)
+  # Expand the tabs used for indentation.
+  asm = string.expandtabs(asm, 2)
+  # Stripe unimportant comments
+  asm = SCRUB_LOOP_COMMENT_RE.sub(r'', asm)
+  # Strip trailing whitespace.
+  asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
+  return asm
+
+def scrub_asm_mips(asm, args):
+  # Scrub runs of whitespace out of the assembly, but leave the leading
+  # whitespace in place.
+  asm = common.SCRUB_WHITESPACE_RE.sub(r' ', asm)
+  # Expand the tabs used for indentation.
+  asm = string.expandtabs(asm, 2)
+  # Strip trailing whitespace.
+  asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
+  return asm
+
+def scrub_asm_riscv(asm, args):
+  # Scrub runs of whitespace out of the assembly, but leave the leading
+  # whitespace in place.
+  asm = common.SCRUB_WHITESPACE_RE.sub(r' ', asm)
+  # Expand the tabs used for indentation.
+  asm = string.expandtabs(asm, 2)
+  # Strip trailing whitespace.
+  asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
+  return asm
+
+def scrub_asm_sparc(asm, args):
+  # Scrub runs of whitespace out of the assembly, but leave the leading
+  # whitespace in place.
+  asm = common.SCRUB_WHITESPACE_RE.sub(r' ', asm)
+  # Expand the tabs used for indentation.
+  asm = string.expandtabs(asm, 2)
+  # Strip trailing whitespace.
+  asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
+  return asm
+
+def scrub_asm_systemz(asm, args):
+  # Scrub runs of whitespace out of the assembly, but leave the leading
+  # whitespace in place.
+  asm = common.SCRUB_WHITESPACE_RE.sub(r' ', asm)
+  # Expand the tabs used for indentation.
+  asm = string.expandtabs(asm, 2)
+  # Strip trailing whitespace.
+  asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
+  return asm
+
+
+def build_function_body_dictionary_for_triple(args, raw_tool_output, triple, prefixes, func_dict):
+  target_handlers = {
+      'x86_64': (scrub_asm_x86, ASM_FUNCTION_X86_RE),
+      'i686': (scrub_asm_x86, ASM_FUNCTION_X86_RE),
+      'x86': (scrub_asm_x86, ASM_FUNCTION_X86_RE),
+      'i386': (scrub_asm_x86, ASM_FUNCTION_X86_RE),
+      'aarch64': (scrub_asm_arm_eabi, ASM_FUNCTION_AARCH64_RE),
+      'r600': (scrub_asm_amdgpu, ASM_FUNCTION_AMDGPU_RE),
+      'amdgcn': (scrub_asm_amdgpu, ASM_FUNCTION_AMDGPU_RE),
+      'arm-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumb-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv6': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv6-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv6t2': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv6t2-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv6m': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv6m-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv7': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv7-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv7m': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv7m-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv8-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv8m.base': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv8m.main': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'armv6': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'armv7': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'armv7-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'armeb-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'armv7eb-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'armv7eb': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'mips': (scrub_asm_mips, ASM_FUNCTION_MIPS_RE),
+      'powerpc64': (scrub_asm_powerpc64, ASM_FUNCTION_PPC_RE),
+      'powerpc64le': (scrub_asm_powerpc64, ASM_FUNCTION_PPC_RE),
+      'riscv32': (scrub_asm_riscv, ASM_FUNCTION_RISCV_RE),
+      'riscv64': (scrub_asm_riscv, ASM_FUNCTION_RISCV_RE),
+      'sparc': (scrub_asm_sparc, ASM_FUNCTION_SPARC_RE),
+      'sparcv9': (scrub_asm_sparc, ASM_FUNCTION_SPARC_RE),
+      's390x': (scrub_asm_systemz, ASM_FUNCTION_SYSTEMZ_RE),
+  }
+  handlers = None
+  for prefix, s in target_handlers.items():
+    if triple.startswith(prefix):
+      handlers = s
+      break
+  else:
+    raise KeyError('Triple %r is not supported' % (triple))
+
+  scrubber, function_re = handlers
+  common.build_function_body_dictionary(
+          function_re, scrubber, [args], raw_tool_output, prefixes,
+          func_dict, args.verbose)
+
+##### Generator of assembly CHECK lines
+
+def add_asm_checks(output_lines, comment_marker, prefix_list, func_dict, func_name):
+  # Label format is based on ASM string.
+  check_label_format = '{} %s-LABEL: %s:'.format(comment_marker)
+  common.add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, True, False)
diff --git a/utils/UpdateTestChecks/common.py b/utils/UpdateTestChecks/common.py
new file mode 100644
index 000000000000..daea395e31fa
--- /dev/null
+++ b/utils/UpdateTestChecks/common.py
@@ -0,0 +1,266 @@
+from __future__ import print_function
+import re
+import string
+import subprocess
+import sys
+import copy
+
+if sys.version_info[0] > 2:
+  class string:
+    expandtabs = str.expandtabs
+else:
+  import string
+
+##### Common utilities for update_*test_checks.py
+
+def should_add_line_to_output(input_line, prefix_set):
+  # Skip any blank comment lines in the IR.
+  if input_line.strip() == ';':
+    return False
+  # Skip any blank lines in the IR.
+  #if input_line.strip() == '':
+  #  return False
+  # And skip any CHECK lines. We're building our own.
+  m = CHECK_RE.match(input_line)
+  if m and m.group(1) in prefix_set:
+    return False
+
+  return True
+
+# Invoke the tool that is being tested.
+def invoke_tool(exe, cmd_args, ir):
+  with open(ir) as ir_file:
+    # TODO Remove the str form which is used by update_test_checks.py and
+    # update_llc_test_checks.py
+    # The safer list form is used by update_cc_test_checks.py
+    if isinstance(cmd_args, list):
+      stdout = subprocess.check_output([exe] + cmd_args, stdin=ir_file)
+    else:
+      stdout = subprocess.check_output(exe + ' ' + cmd_args,
+                                       shell=True, stdin=ir_file)
+    if sys.version_info[0] > 2:
+      stdout = stdout.decode()
+  # Fix line endings to unix CR style.
+  return stdout.replace('\r\n', '\n')
+
+##### LLVM IR parser
+
+RUN_LINE_RE = re.compile('^\s*[;#]\s*RUN:\s*(.*)$')
+CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?[= ](\S+)')
+CHECK_RE = re.compile(r'^\s*[;#]\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
+
+OPT_FUNCTION_RE = re.compile(
+    r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*\('
+    r'(\s+)?[^)]*[^{]*\{\n(?P<body>.*?)^\}$',
+    flags=(re.M | re.S))
+
+ANALYZE_FUNCTION_RE = re.compile(
+    r'^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w-]+?)\':'
+    r'\s*\n(?P<body>.*)$',
+    flags=(re.X | re.S))
+
+IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@(\w+)\s*\(')
+TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
+TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)')
+MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)')
+
+SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
+SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
+SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
+SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
+SCRUB_LOOP_COMMENT_RE = re.compile(
+    r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M)
+
+def scrub_body(body):
+  # Scrub runs of whitespace out of the assembly, but leave the leading
+  # whitespace in place.
+  body = SCRUB_WHITESPACE_RE.sub(r' ', body)
+  # Expand the tabs used for indentation.
+  body = string.expandtabs(body, 2)
+  # Strip trailing whitespace.
+  body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body)
+  return body
+
+def do_scrub(body, scrubber, scrubber_args, extra):
+  if scrubber_args:
+    local_args = copy.deepcopy(scrubber_args)
+    local_args[0].extra_scrub = extra
+    return scrubber(body, *local_args)
+  return scrubber(body, *scrubber_args)
+
+# Build up a dictionary of all the function bodies.
+class function_body(object):
+  def __init__(self, string, extra):
+    self.scrub = string
+    self.extrascrub = extra
+  def __str__(self):
+    return self.scrub
+
+def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_tool_output, prefixes, func_dict, verbose):
+  for m in function_re.finditer(raw_tool_output):
+    if not m:
+      continue
+    func = m.group('func')
+    body = m.group('body')
+    scrubbed_body = do_scrub(body, scrubber, scrubber_args, extra = False)
+    scrubbed_extra = do_scrub(body, scrubber, scrubber_args, extra = True)
+    if m.groupdict().has_key('analysis'):
+      analysis = m.group('analysis')
+      if analysis.lower() != 'cost model analysis':
+        print('WARNING: Unsupported analysis mode: %r!' % (analysis,), file=sys.stderr)
+    if func.startswith('stress'):
+      # We only use the last line of the function body for stress tests.
+      scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
+    if verbose:
+      print('Processing function: ' + func, file=sys.stderr)
+      for l in scrubbed_body.splitlines():
+        print('  ' + l, file=sys.stderr)
+    for prefix in prefixes:
+      if func in func_dict[prefix] and str(func_dict[prefix][func]) != scrubbed_body:
+        if func_dict[prefix][func] and func_dict[prefix][func].extrascrub == scrubbed_extra:
+          func_dict[prefix][func].scrub = scrubbed_extra
+          continue
+        else:
+          if prefix == prefixes[-1]:
+            print('WARNING: Found conflicting asm under the '
+                                 'same prefix: %r!' % (prefix,), file=sys.stderr)
+          else:
+            func_dict[prefix][func] = None
+            continue
+
+      func_dict[prefix][func] = function_body(scrubbed_body, scrubbed_extra)
+
+##### Generator of LLVM IR CHECK lines
+
+SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
+
+# Match things that look at identifiers, but only if they are followed by
+# spaces, commas, paren, or end of the string
+IR_VALUE_RE = re.compile(r'(\s+)%([\w\.\-]+?)([,\s\(\)]|\Z)')
+
+# Create a FileCheck variable name based on an IR name.
+def get_value_name(var):
+  if var.isdigit():
+    var = 'TMP' + var
+  var = var.replace('.', '_')
+  var = var.replace('-', '_')
+  return var.upper()
+
+
+# Create a FileCheck variable from regex.
+def get_value_definition(var):
+  return '[[' + get_value_name(var) + ':%.*]]'
+
+
+# Use a FileCheck variable.
+def get_value_use(var):
+  return '[[' + get_value_name(var) + ']]'
+
+# Replace IR value defs and uses with FileCheck variables.
+def genericize_check_lines(lines, is_analyze):
+  # This gets called for each match that occurs in
+  # a line. We transform variables we haven't seen
+  # into defs, and variables we have seen into uses.
+  def transform_line_vars(match):
+    var = match.group(2)
+    if var in vars_seen:
+      rv = get_value_use(var)
+    else:
+      vars_seen.add(var)
+      rv = get_value_definition(var)
+    # re.sub replaces the entire regex match
+    # with whatever you return, so we have
+    # to make sure to hand it back everything
+    # including the commas and spaces.
+    return match.group(1) + rv + match.group(3)
+
+  vars_seen = set()
+  lines_with_def = []
+
+  for i, line in enumerate(lines):
+    # An IR variable named '%.' matches the FileCheck regex string.
+    line = line.replace('%.', '%dot')
+    # Ignore any comments, since the check lines will too.
+    scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
+    if is_analyze == False:
+      lines[i] =  IR_VALUE_RE.sub(transform_line_vars, scrubbed_line)
+    else:
+      lines[i] =  scrubbed_line
+  return lines
+
+
+def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_asm, is_analyze):
+  printed_prefixes = []
+  for p in prefix_list:
+    checkprefixes = p[0]
+    for checkprefix in checkprefixes:
+      if checkprefix in printed_prefixes:
+        break
+      # TODO func_dict[checkprefix] may be None, '' or not exist.
+      # Fix the call sites.
+      if func_name not in func_dict[checkprefix] or not func_dict[checkprefix][func_name]:
+        continue
+
+      # Add some space between different check prefixes, but not after the last
+      # check line (before the test code).
+      if is_asm == True:
+        if len(printed_prefixes) != 0:
+          output_lines.append(comment_marker)
+
+      printed_prefixes.append(checkprefix)
+      output_lines.append(check_label_format % (checkprefix, func_name))
+      func_body = str(func_dict[checkprefix][func_name]).splitlines()
+
+      # For ASM output, just emit the check lines.
+      if is_asm == True:
+        output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
+        for func_line in func_body[1:]:
+          output_lines.append('%s %s-NEXT:  %s' % (comment_marker, checkprefix, func_line))
+        break
+
+      # For IR output, change all defs to FileCheck variables, so we're immune
+      # to variable naming fashions.
+      func_body = genericize_check_lines(func_body, is_analyze)
+
+      # This could be selectively enabled with an optional invocation argument.
+      # Disabled for now: better to check everything. Be safe rather than sorry.
+
+      # Handle the first line of the function body as a special case because
+      # it's often just noise (a useless asm comment or entry label).
+      #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
+      #  is_blank_line = True
+      #else:
+      #  output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
+      #  is_blank_line = False
+
+      is_blank_line = False
+
+      for func_line in func_body:
+        if func_line.strip() == '':
+          is_blank_line = True
+          continue
+        # Do not waste time checking IR comments.
+        func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
+
+        # Skip blank lines instead of checking them.
+        if is_blank_line == True:
+          output_lines.append('{} {}:       {}'.format(
+              comment_marker, checkprefix, func_line))
+        else:
+          output_lines.append('{} {}-NEXT:  {}'.format(
+              comment_marker, checkprefix, func_line))
+        is_blank_line = False
+
+      # Add space between different check prefixes and also before the first
+      # line of code in the test function.
+      output_lines.append(comment_marker)
+      break
+
+def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict, func_name):
+  # Label format is based on IR string.
+  check_label_format = '{} %s-LABEL: @%s('.format(comment_marker)
+  add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, False, False)
+
+def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name):
+  check_label_format = '{} %s-LABEL: \'%s\''.format(comment_marker)
+  add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, False, True)
diff --git a/utils/bugpoint_gisel_reducer.py b/utils/bugpoint_gisel_reducer.py
new file mode 100755
index 000000000000..4c366efbba49
--- /dev/null
+++ b/utils/bugpoint_gisel_reducer.py
@@ -0,0 +1,146 @@
+#!/usr/bin/env python
+
+"""Reduces GlobalISel failures.
+
+This script is a utility to reduce tests that GlobalISel
+fails to compile.
+
+It runs llc to get the error message using a regex and creates
+a custom command to check that specific error. Then, it runs bugpoint
+with the custom command.
+
+"""
+from __future__ import print_function
+import argparse
+import re
+import subprocess
+import sys
+import tempfile
+import os
+
+
+def log(msg):
+    print(msg)
+
+
+def hr():
+    log('-' * 50)
+
+
+def log_err(msg):
+    print('ERROR: {}'.format(msg), file=sys.stderr)
+
+
+def check_path(path):
+    if not os.path.exists(path):
+        log_err('{} does not exist.'.format(path))
+        raise
+    return path
+
+
+def check_bin(build_dir, bin_name):
+    file_name = '{}/bin/{}'.format(build_dir, bin_name)
+    return check_path(file_name)
+
+
+def run_llc(llc, irfile):
+    pr = subprocess.Popen([llc,
+                           '-o',
+                           '-',
+                           '-global-isel',
+                           '-pass-remarks-missed=gisel',
+                           irfile],
+                          stdout=subprocess.PIPE,
+                          stderr=subprocess.PIPE)
+    out, err = pr.communicate()
+    res = pr.wait()
+    if res == 0:
+        return 0
+    re_err = re.compile(
+        r'LLVM ERROR: ([a-z\s]+):.*(G_INTRINSIC[_A-Z]* <intrinsic:@[a-zA-Z0-9\.]+>|G_[A-Z_]+)')
+    match = re_err.match(err)
+    if not match:
+        return 0
+    else:
+        return [match.group(1), match.group(2)]
+
+
+def run_bugpoint(bugpoint_bin, llc_bin, opt_bin, tmp, ir_file):
+    compileCmd = '-compile-command={} -c {} {}'.format(
+        os.path.realpath(__file__), llc_bin, tmp)
+    pr = subprocess.Popen([bugpoint_bin,
+                           '-compile-custom',
+                           compileCmd,
+                           '-opt-command={}'.format(opt_bin),
+                           ir_file])
+    res = pr.wait()
+    if res != 0:
+        log_err("Unable to reduce the test.")
+        raise
+
+
+def run_bugpoint_check():
+    path_to_llc = sys.argv[2]
+    path_to_err = sys.argv[3]
+    path_to_ir = sys.argv[4]
+    with open(path_to_err, 'r') as f:
+        err = f.read()
+        res = run_llc(path_to_llc, path_to_ir)
+        if res == 0:
+            return 0
+        log('GlobalISed failed, {}: {}'.format(res[0], res[1]))
+        if res != err.split(';'):
+            return 0
+        else:
+            return 1
+
+
+def main():
+    # Check if this is called by bugpoint.
+    if len(sys.argv) == 5 and sys.argv[1] == '-c':
+        sys.exit(run_bugpoint_check())
+
+    # Parse arguments.
+    parser = argparse.ArgumentParser(
+        description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
+    parser.add_argument('BuildDir', help="Path to LLVM build directory")
+    parser.add_argument('IRFile', help="Path to the input IR file")
+    args = parser.parse_args()
+
+    # Check if the binaries exist.
+    build_dir = check_path(args.BuildDir)
+    ir_file = check_path(args.IRFile)
+    llc_bin = check_bin(build_dir, 'llc')
+    opt_bin = check_bin(build_dir, 'opt')
+    bugpoint_bin = check_bin(build_dir, 'bugpoint')
+
+    # Run llc to see if GlobalISel fails.
+    log('Running llc...')
+    res = run_llc(llc_bin, ir_file)
+    if res == 0:
+        log_err("Expected failure")
+        raise
+    hr()
+    log('GlobalISel failed, {}: {}.'.format(res[0], res[1]))
+    tmp = tempfile.NamedTemporaryFile()
+    log('Writing error to {} for bugpoint.'.format(tmp.name))
+    tmp.write(';'.join(res))
+    tmp.flush()
+    hr()
+
+    # Run bugpoint.
+    log('Running bugpoint...')
+    run_bugpoint(bugpoint_bin, llc_bin, opt_bin, tmp.name, ir_file)
+    hr()
+    log('Done!')
+    hr()
+    output_file = 'bugpoint-reduced-simplified.bc'
+    log('Run llvm-dis to disassemble the output:')
+    log('$ {}/bin/llvm-dis -o - {}'.format(build_dir, output_file))
+    log('Run llc to reproduce the problem:')
+    log('$ {}/bin/llc -o - -global-isel '
+        '-pass-remarks-missed=gisel {}'.format(build_dir, output_file))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/utils/docker/build_docker_image.sh b/utils/docker/build_docker_image.sh
index 9b0ba46fe4b3..820a5f75ad13 100755
--- a/utils/docker/build_docker_image.sh
+++ b/utils/docker/build_docker_image.sh
@@ -13,6 +13,8 @@ IMAGE_SOURCE=""
 DOCKER_REPOSITORY=""
 DOCKER_TAG=""
 BUILDSCRIPT_ARGS=""
+CHECKOUT_ARGS=""
+CMAKE_ENABLED_PROJECTS=""
 
 function show_usage() {
   cat << EOF
@@ -25,7 +27,7 @@ Available options:
     -s|--source             image source dir (i.e. debian8, nvidia-cuda, etc)
     -d|--docker-repository  docker repository for the image
     -t|--docker-tag         docker tag for the image
-  LLVM-specific:
+  Checkout arguments:
     -b|--branch         svn branch to checkout, i.e. 'trunk',
                         'branches/release_40'
                         (default: 'trunk')
@@ -40,11 +42,12 @@ Available options:
                         Project 'llvm' is always included and ignored, if
                         specified.
                         Can be specified multiple times.
-    -i|--install-target name of a cmake install target to build and include in
-                        the resulting archive. Can be specified multiple times.
     -c|--checksums      name of a file, containing checksums of llvm checkout.
                         Script will fail if checksums of the checkout do not
                         match.
+  Build-specific:
+    -i|--install-target name of a cmake install target to build and include in
+                        the resulting archive. Can be specified multiple times.
 
 Required options: --source and --docker-repository, at least one
   --install-target.
@@ -75,6 +78,7 @@ EOF
 
 CHECKSUMS_FILE=""
 SEEN_INSTALL_TARGET=0
+SEEN_CMAKE_ARGS=0
 while [[ $# -gt 0 ]]; do
   case "$1" in
     -h|--help)
@@ -96,13 +100,28 @@ while [[ $# -gt 0 ]]; do
       DOCKER_TAG="$1"
       shift
       ;;
-    -i|--install-target|-r|--revision|-c|-cherrypick|-b|--branch|-p|--llvm-project)
-      if [ "$1" == "-i" ] || [ "$1" == "--install-target" ]; then
-        SEEN_INSTALL_TARGET=1
-      fi
+    -r|--revision|-c|-cherrypick|-b|--branch)
+      CHECKOUT_ARGS="$CHECKOUT_ARGS $1 $2"
+      shift 2
+      ;;
+    -i|--install-target)
+      SEEN_INSTALL_TARGET=1
       BUILDSCRIPT_ARGS="$BUILDSCRIPT_ARGS $1 $2"
       shift 2
       ;;
+    -p|--llvm-project)
+      PROJ="$2"
+      if [ "$PROJ" == "cfe" ]; then
+        PROJ="clang"
+      fi
+
+      CHECKOUT_ARGS="$CHECKOUT_ARGS $1 $PROJ"
+      if [ "$PROJ" != "clang-tools-extra" ]; then
+        CMAKE_ENABLED_PROJECTS="$CMAKE_ENABLED_PROJECTS;$PROJ"
+      fi
+
+      shift 2
+      ;;
     -c|--checksums)
       shift
       CHECKSUMS_FILE="$1"
@@ -111,6 +130,7 @@ while [[ $# -gt 0 ]]; do
     --)
       shift
       BUILDSCRIPT_ARGS="$BUILDSCRIPT_ARGS -- $*"
+      SEEN_CMAKE_ARGS=1
       shift $#
       ;;
     *)
@@ -120,6 +140,17 @@ while [[ $# -gt 0 ]]; do
   esac
 done
 
+
+if [ "$CMAKE_ENABLED_PROJECTS" != "" ]; then
+  # Remove the leading ';' character.
+  CMAKE_ENABLED_PROJECTS="${CMAKE_ENABLED_PROJECTS:1}"
+
+  if [[ $SEEN_CMAKE_ARGS -eq 0 ]]; then
+    BUILDSCRIPT_ARGS="$BUILDSCRIPT_ARGS --"
+  fi
+  BUILDSCRIPT_ARGS="$BUILDSCRIPT_ARGS -DLLVM_ENABLE_PROJECTS=$CMAKE_ENABLED_PROJECTS"
+fi
+
 command -v docker >/dev/null ||
   {
     echo "Docker binary cannot be found. Please install Docker to use this script."
@@ -163,19 +194,10 @@ if [ "$DOCKER_TAG" != "" ]; then
   DOCKER_TAG=":$DOCKER_TAG"
 fi
 
-echo "Building from $IMAGE_SOURCE"
-echo "Building $DOCKER_REPOSITORY-build$DOCKER_TAG"
-docker build -t "$DOCKER_REPOSITORY-build$DOCKER_TAG" \
+echo "Building ${DOCKER_REPOSITORY}${DOCKER_TAG} from $IMAGE_SOURCE"
+docker build -t "${DOCKER_REPOSITORY}${DOCKER_TAG}" \
+  --build-arg "checkout_args=$CHECKOUT_ARGS" \
   --build-arg "buildscript_args=$BUILDSCRIPT_ARGS" \
-  -f "$BUILD_DIR/$IMAGE_SOURCE/build/Dockerfile" \
+  -f "$BUILD_DIR/$IMAGE_SOURCE/Dockerfile" \
   "$BUILD_DIR"
-
-echo "Copying clang installation to release image sources"
-docker run -v "$BUILD_DIR/$IMAGE_SOURCE:/workspace" "$DOCKER_REPOSITORY-build$DOCKER_TAG" \
-  cp /tmp/clang.tar.gz /workspace/release
-
-echo "Building release image"
-docker build -t "${DOCKER_REPOSITORY}${DOCKER_TAG}" \
-  "$BUILD_DIR/$IMAGE_SOURCE/release"
-
 echo "Done"
diff --git a/utils/docker/debian8/build/Dockerfile b/utils/docker/debian8/Dockerfile
index 5c5ed6744963..fe12ec55e52a 100644
--- a/utils/docker/debian8/build/Dockerfile
+++ b/utils/docker/debian8/Dockerfile
@@ -6,25 +6,27 @@
 # License. See LICENSE.TXT for details.
 #
 #===----------------------------------------------------------------------===//
-# Produces an image that compiles and archives clang, based on debian8.
-FROM launcher.gcr.io/google/debian8:latest
-
+# Stage 1. Check out LLVM source code and run the build.
+FROM launcher.gcr.io/google/debian8:latest as builder
 LABEL maintainer "LLVM Developers"
-
 # Install build dependencies of llvm.
 # First, Update the apt's source list and include the sources of the packages.
 RUN grep deb /etc/apt/sources.list | \
     sed 's/^deb/deb-src /g' >> /etc/apt/sources.list
-
 # Install compiler, python and subversion.
 RUN apt-get update && \
     apt-get install -y --no-install-recommends ca-certificates gnupg \
-           build-essential python wget subversion ninja-build && \
+           build-essential python wget subversion unzip && \
     rm -rf /var/lib/apt/lists/*
-
+# Install a newer ninja release. It seems the older version in the debian repos
+# randomly crashes when compiling llvm.
+RUN wget "https://github.com/ninja-build/ninja/releases/download/v1.8.2/ninja-linux.zip" && \
+    echo "d2fea9ff33b3ef353161ed906f260d565ca55b8ca0568fa07b1d2cab90a84a07 ninja-linux.zip" \
+        | sha256sum -c  && \
+    unzip ninja-linux.zip -d /usr/local/bin && \
+    rm ninja-linux.zip
 # Import public key required for verifying signature of cmake download.
 RUN gpg --keyserver hkp://pgp.mit.edu --recv 0x2D2CEF1034921684
-
 # Download, verify and install cmake version that can compile clang into /usr/local.
 # (Version in debian8 repos is is too old)
 RUN mkdir /tmp/cmake-install && cd /tmp/cmake-install && \
@@ -40,8 +42,20 @@ RUN mkdir /tmp/cmake-install && cd /tmp/cmake-install && \
 ADD checksums /tmp/checksums
 ADD scripts /tmp/scripts
 
-# Arguments passed to build_install_clang.sh.
+# Checkout the source code.
+ARG checkout_args
+RUN /tmp/scripts/checkout.sh ${checkout_args}
+# Run the build. Results of the build will be available at /tmp/clang-install/.
 ARG buildscript_args
+RUN /tmp/scripts/build_install_llvm.sh --to /tmp/clang-install ${buildscript_args}
 
-# Run the build. Results of the build will be available as /tmp/clang.tar.gz.
-RUN /tmp/scripts/build_install_llvm.sh ${buildscript_args}
+
+# Stage 2. Produce a minimal release image with build results.
+FROM launcher.gcr.io/google/debian8:latest
+LABEL maintainer "LLVM Developers"
+# Install packages for minimal useful image.
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends libstdc++-4.9-dev binutils && \
+    rm -rf /var/lib/apt/lists/*
+# Copy build results of stage 1 to /usr/local.
+COPY --from=builder /tmp/clang-install/ /usr/local/
diff --git a/utils/docker/debian8/release/Dockerfile b/utils/docker/debian8/release/Dockerfile
deleted file mode 100644
index 3a44a7d41166..000000000000
--- a/utils/docker/debian8/release/Dockerfile
+++ /dev/null
@@ -1,21 +0,0 @@
-#===- llvm/utils/docker/debian8/release/Dockerfile -----------------------===//
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===----------------------------------------------------------------------===//
-# A release image, containing clang installation, produced by the 'build/' image
-# and adding libstdc++ and binutils.
-FROM launcher.gcr.io/google/debian8:latest
-
-LABEL maintainer "LLVM Developers"
-
-# Install packages for minimal useful image.
-RUN apt-get update && \
-    apt-get install -y --no-install-recommends libstdc++-4.9-dev binutils && \
-    rm -rf /var/lib/apt/lists/*
-
-# Unpack clang installation into this image.
-ADD clang.tar.gz /usr/local/
diff --git a/utils/docker/example/Dockerfile b/utils/docker/example/Dockerfile
new file mode 100644
index 000000000000..d875ed96d907
--- /dev/null
+++ b/utils/docker/example/Dockerfile
@@ -0,0 +1,40 @@
+#===- llvm/utils/docker/example/build/Dockerfile -------------------------===//
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===----------------------------------------------------------------------===//
+# This is an example Dockerfile to build an image that compiles clang.
+# Replace FIXMEs to prepare your own image.
+
+# Stage 1. Check out LLVM source code and run the build.
+# FIXME: Replace 'ubuntu' with your base image
+FROM ubuntu as builder
+# FIXME: Change maintainer name
+LABEL maintainer "Maintainer <maintainer@email>"
+# FIXME: Install llvm/clang build dependencies here. Including compiler to
+# build stage1, cmake, subversion, ninja, etc.
+
+ADD checksums /tmp/checksums
+ADD scripts /tmp/scripts
+
+# Checkout the source code.
+ARG checkout_args
+RUN /tmp/scripts/checkout.sh ${checkout_args}
+# Run the build. Results of the build will be available at /tmp/clang-install/.
+ARG buildscript_args
+RUN /tmp/scripts/build_install_llvm.sh --to /tmp/clang-install ${buildscript_args}
+
+
+# Stage 2. Produce a minimal release image with build results.
+# FIXME: Replace 'ubuntu' with your base image.
+FROM ubuntu
+# FIXME: Change maintainer name.
+LABEL maintainer "Maintainer <maintainer@email>"
+# FIXME: Install all packages you want to have in your release container.
+# A minimal useful installation should include at least libstdc++ and binutils.
+
+# Copy build results of stage 1 to /usr/local.
+COPY --from=builder /tmp/clang-install/ /usr/local/
diff --git a/utils/docker/example/build/Dockerfile b/utils/docker/example/build/Dockerfile
deleted file mode 100644
index be077f59f48a..000000000000
--- a/utils/docker/example/build/Dockerfile
+++ /dev/null
@@ -1,28 +0,0 @@
-#===- llvm/utils/docker/example/build/Dockerfile -------------------------===//
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===----------------------------------------------------------------------===//
-# This is an example Dockerfile to build an image that compiles clang.
-# Replace FIXMEs to prepare your own image.
-
-# FIXME: Replace 'ubuntu' with your base image
-FROM ubuntu
-
-# FIXME: Change maintainer name
-LABEL maintainer "Maintainer <maintainer@email>"
-
-# FIXME: Install llvm/clang build dependencies. Including compiler to
-# build stage1, cmake, subversion, ninja, etc.
-
-ADD checksums /tmp/checksums
-ADD scripts /tmp/scripts
-
-# Arguments passed to build_install_clang.sh.
-ARG buildscript_args
-
-# Run the build. Results of the build will be available as /tmp/clang.tar.gz.
-RUN /tmp/scripts/build_install_llvm.sh ${buildscript_args}
diff --git a/utils/docker/example/release/Dockerfile b/utils/docker/example/release/Dockerfile
deleted file mode 100644
index b088ad885ac5..000000000000
--- a/utils/docker/example/release/Dockerfile
+++ /dev/null
@@ -1,24 +0,0 @@
-#===- llvm/utils/docker/example/release/Dockerfile -----------------------===//
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===----------------------------------------------------------------------===//
-# An image that unpacks a clang installation, compiled by the 'build/'
-# container.
-# Replace FIXMEs to prepare your own image.
-
-# FIXME: Replace 'ubuntu' with your base image.
-FROM ubuntu
-
-# FIXME: Change maintainer name.
-LABEL maintainer "Maintainer <maintainer@email>"
-
-# FIXME: Install all packages you want to have in your release container.
-# A minimal useful installation must include libstdc++ and binutils.
-
-# Unpack clang installation into this container.
-# It is copied to this directory by build_docker_image.sh script.
-ADD clang.tar.gz /usr/local/
diff --git a/utils/docker/nvidia-cuda/Dockerfile b/utils/docker/nvidia-cuda/Dockerfile
new file mode 100644
index 000000000000..32804d77ee9a
--- /dev/null
+++ b/utils/docker/nvidia-cuda/Dockerfile
@@ -0,0 +1,34 @@
+#===- llvm/utils/docker/nvidia-cuda/build/Dockerfile ---------------------===//
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===----------------------------------------------------------------------===//
+# Stage 1. Check out LLVM source code and run the build.
+FROM nvidia/cuda:8.0-devel as builder
+LABEL maintainer "LLVM Developers"
+# Install llvm build dependencies.
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends ca-certificates cmake python \
+        subversion ninja-build && \
+    rm -rf /var/lib/apt/lists/*
+
+ADD checksums /tmp/checksums
+ADD scripts /tmp/scripts
+
+# Checkout the source code.
+ARG checkout_args
+RUN /tmp/scripts/checkout.sh ${checkout_args}
+# Run the build. Results of the build will be available at /tmp/clang-install/.
+ARG buildscript_args
+RUN /tmp/scripts/build_install_llvm.sh --to /tmp/clang-install ${buildscript_args}
+
+
+# Stage 2. Produce a minimal release image with build results.
+FROM nvidia/cuda:8.0-devel
+LABEL maintainer "LLVM Developers"
+# Copy clang installation into this container.
+COPY --from=builder /tmp/clang-install/ /usr/local/
+# C++ standard library and binutils are already included in the base package.
diff --git a/utils/docker/nvidia-cuda/build/Dockerfile b/utils/docker/nvidia-cuda/build/Dockerfile
deleted file mode 100644
index cd353a2578bd..000000000000
--- a/utils/docker/nvidia-cuda/build/Dockerfile
+++ /dev/null
@@ -1,31 +0,0 @@
-#===- llvm/utils/docker/nvidia-cuda/build/Dockerfile ---------------------===//
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===----------------------------------------------------------------------===//
-# Produces an image that compiles and archives clang, based on nvidia/cuda
-# image.
-FROM nvidia/cuda:8.0-devel
-
-LABEL maintainer "LLVM Developers"
-
-# Arguments to pass to build_install_clang.sh.
-ARG buildscript_args
-
-# Install llvm build dependencies.
-RUN apt-get update && \
-    apt-get install -y --no-install-recommends ca-certificates cmake python \
-		    subversion ninja-build && \
-    rm -rf /var/lib/apt/lists/*
-
-ADD checksums /tmp/checksums
-ADD scripts /tmp/scripts
-
-# Arguments passed to build_install_clang.sh.
-ARG buildscript_args
-
-# Run the build. Results of the build will be available as /tmp/clang.tar.gz.
-RUN /tmp/scripts/build_install_llvm.sh ${buildscript_args}
diff --git a/utils/docker/nvidia-cuda/release/Dockerfile b/utils/docker/nvidia-cuda/release/Dockerfile
deleted file mode 100644
index a30d7d7e91ee..000000000000
--- a/utils/docker/nvidia-cuda/release/Dockerfile
+++ /dev/null
@@ -1,23 +0,0 @@
-#===- llvm/utils/docker/nvidia-cuda/release/Dockerfile -------------------===//
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===----------------------------------------------------------------------===//
-# This is an example Dockerfile that copies a clang installation, compiled
-# by the 'build/' container into a fresh docker image to get a container of
-# minimal size.
-# Replace FIXMEs to prepare a new Dockerfile.
-
-# FIXME: Replace 'ubuntu' with your base image.
-FROM nvidia/cuda:8.0-devel
-
-# FIXME: Change maintainer name.
-LABEL maintainer "LLVM Developers"
-
-# Unpack clang installation into this container.
-ADD clang.tar.gz /usr/local/
-
-# C++ standard library and binutils are already included in the base package.
diff --git a/utils/docker/scripts/build_install_llvm.sh b/utils/docker/scripts/build_install_llvm.sh
index 5141fdc9bb5e..0b9c08372039 100755
--- a/utils/docker/scripts/build_install_llvm.sh
+++ b/utils/docker/scripts/build_install_llvm.sh
@@ -14,112 +14,35 @@ function show_usage() {
   cat << EOF
 Usage: build_install_llvm.sh [options] -- [cmake-args]
 
-Checkout svn sources and run cmake with the specified arguments. Used
-inside docker container.
-Passes additional -DCMAKE_INSTALL_PREFIX and archives the contents of
-the directory to /tmp/clang.tar.gz.
+Run cmake with the specified arguments. Used inside docker container.
+Passes additional -DCMAKE_INSTALL_PREFIX and puts the build results into
+the directory specified by --to option.
 
 Available options:
   -h|--help           show this help message
-  -b|--branch         svn branch to checkout, i.e. 'trunk',
-                      'branches/release_40'
-                      (default: 'trunk')
-  -r|--revision       svn revision to checkout
-  -c|--cherrypick     revision to cherry-pick. Can be specified multiple times.
-                      Cherry-picks are performed in the sorted order using the
-                      following command:
-                      'svn patch <(svn diff -c \$rev)'.
-  -p|--llvm-project   name of an svn project to checkout. Will also add the
-                      project to a list LLVM_ENABLE_PROJECTS, passed to CMake.
-                      For clang, please use 'clang', not 'cfe'.
-                      Project 'llvm' is always included and ignored, if
-                      specified.
-                      Can be specified multiple times.
   -i|--install-target name of a cmake install target to build and include in
                       the resulting archive. Can be specified multiple times.
-Required options: At least one --install-target.
+  --to                destination directory where to install the targets.
+Required options: --to, at least one --install-target.
 
 All options after '--' are passed to CMake invocation.
 EOF
 }
 
-LLVM_SVN_REV=""
-CHERRYPICKS=""
-LLVM_BRANCH=""
 CMAKE_ARGS=""
 CMAKE_INSTALL_TARGETS=""
-# We always checkout llvm
-LLVM_PROJECTS="llvm"
-CMAKE_LLVM_ENABLE_PROJECTS=""
-CLANG_TOOLS_EXTRA_ENABLED=0
-
-function contains_project() {
-  local TARGET_PROJ="$1"
-  local PROJ
-  for PROJ in $LLVM_PROJECTS; do
-    if [ "$PROJ" == "$TARGET_PROJ" ]; then
-      return 0
-    fi
-  done
-  return 1
-}
-
-function append_project() {
-  local PROJ="$1"
-
-  LLVM_PROJECTS="$LLVM_PROJECTS $PROJ"
-  if [ "$CMAKE_LLVM_ENABLE_PROJECTS" != "" ]; then
-    CMAKE_LLVM_ENABLE_PROJECTS="$CMAKE_LLVM_ENABLE_PROJECTS;$PROJ"
-  else
-    CMAKE_LLVM_ENABLE_PROJECTS="$PROJ"
-  fi
-}
+CLANG_INSTALL_DIR=""
 
 while [[ $# -gt 0 ]]; do
   case "$1" in
-    -r|--revision)
-      shift
-      LLVM_SVN_REV="$1"
-      shift
-      ;;
-    -c|--cherrypick)
-      shift
-      CHERRYPICKS="$CHERRYPICKS $1"
-      shift
-      ;;
-    -b|--branch)
+    -i|--install-target)
       shift
-      LLVM_BRANCH="$1"
+      CMAKE_INSTALL_TARGETS="$CMAKE_INSTALL_TARGETS $1"
       shift
       ;;
-    -p|--llvm-project)
+    --to)
       shift
-      PROJ="$1"
-      shift
-
-      if [ "$PROJ" == "cfe" ]; then
-        PROJ="clang"
-      fi
-
-      if [ "$PROJ" == "clang-tools-extra" ]; then
-        if [ $CLANG_TOOLS_EXTRA_ENABLED -ne 0 ]; then
-          echo "Project 'clang-tools-extra' is already enabled, ignoring extra occurences."
-        else
-          CLANG_TOOLS_EXTRA_ENABLED=1
-        fi
-
-        continue
-      fi
-
-      if ! contains_project "$PROJ" ; then
-        append_project "$PROJ"
-      else
-        echo "Project '$PROJ' is already enabled, ignoring extra occurences."
-      fi
-      ;;
-    -i|--install-target)
-      shift
-      CMAKE_INSTALL_TARGETS="$CMAKE_INSTALL_TARGETS $1"
+      CLANG_INSTALL_DIR="$1"
       shift
       ;;
     --)
@@ -142,114 +65,29 @@ if [ "$CMAKE_INSTALL_TARGETS" == "" ]; then
   exit 1
 fi
 
-if [ $CLANG_TOOLS_EXTRA_ENABLED -ne 0 ]; then
-  if ! contains_project "clang"; then
-    echo "Project 'clang-tools-extra' was enabled without 'clang'."
-    echo "Adding 'clang' to a list of projects."
-
-    append_project "clang"
-  fi
-fi
-
-if [ "$LLVM_BRANCH" == "" ]; then
-  LLVM_BRANCH="trunk"
-fi
-
-if [ "$LLVM_SVN_REV" != "" ]; then
-  SVN_REV_ARG="-r$LLVM_SVN_REV"
-  echo "Checking out svn revision r$LLVM_SVN_REV."
-else
-  SVN_REV_ARG=""
-  echo "Checking out latest svn revision."
+if [ "$CLANG_INSTALL_DIR" == "" ]; then
+  echo "No install directory. Please specify the --to argument."
+  exit 1
 fi
 
-# Sort cherrypicks and remove duplicates.
-CHERRYPICKS="$(echo "$CHERRYPICKS" | xargs -n1 | sort | uniq | xargs)"
-
-function apply_cherrypicks() {
-  local CHECKOUT_DIR="$1"
-
-  [ "$CHERRYPICKS" == "" ] || echo "Applying cherrypicks"
-  pushd "$CHECKOUT_DIR"
-
-  # This function is always called on a sorted list of cherrypicks.
-  for CHERRY_REV in $CHERRYPICKS; do
-    echo "Cherry-picking r$CHERRY_REV into $CHECKOUT_DIR"
-
-    local PATCH_FILE="$(mktemp)"
-    svn diff -c $CHERRY_REV > "$PATCH_FILE"
-    svn patch "$PATCH_FILE"
-    rm "$PATCH_FILE"
-  done
-
-  popd
-}
-
 CLANG_BUILD_DIR=/tmp/clang-build
-CLANG_INSTALL_DIR=/tmp/clang-install
-
-mkdir "$CLANG_BUILD_DIR"
-
-# Get the sources from svn.
-echo "Checking out sources from svn"
-mkdir "$CLANG_BUILD_DIR/src"
-for LLVM_PROJECT in $LLVM_PROJECTS; do
-  if [ "$LLVM_PROJECT" == "clang" ]; then
-    SVN_PROJECT="cfe"
-  else
-    SVN_PROJECT="$LLVM_PROJECT"
-  fi
-
-  echo "Checking out https://llvm.org/svn/llvm-project/$SVN_PROJECT to $CLANG_BUILD_DIR/src/$LLVM_PROJECT"
-  svn co -q $SVN_REV_ARG \
-    "https://llvm.org/svn/llvm-project/$SVN_PROJECT/$LLVM_BRANCH" \
-    "$CLANG_BUILD_DIR/src/$LLVM_PROJECT"
-
-  # We apply cherrypicks to all repositories regardless of whether the revision
-  # changes this repository or not. For repositories not affected by the
-  # cherrypick, applying the cherrypick is a no-op.
-  apply_cherrypicks "$CLANG_BUILD_DIR/src/$LLVM_PROJECT"
-done
 
-if [ $CLANG_TOOLS_EXTRA_ENABLED -ne 0 ]; then
-  echo "Checking out https://llvm.org/svn/llvm-project/clang-tools-extra to $CLANG_BUILD_DIR/src/clang/tools/extra"
-  svn co -q $SVN_REV_ARG \
-    "https://llvm.org/svn/llvm-project/clang-tools-extra/$LLVM_BRANCH" \
-    "$CLANG_BUILD_DIR/src/clang/tools/extra"
-
-  apply_cherrypicks "$CLANG_BUILD_DIR/src/clang/tools/extra"
-fi
+mkdir -p "$CLANG_INSTALL_DIR"
 
-CHECKSUMS_FILE="/tmp/checksums/checksums.txt"
-
-if [ -f "$CHECKSUMS_FILE" ]; then
-  echo "Validating checksums for LLVM checkout..."
-  python "$(dirname $0)/llvm_checksum/llvm_checksum.py" -c "$CHECKSUMS_FILE" \
-    --partial --multi_dir "$CLANG_BUILD_DIR/src"
-else
-  echo "Skipping checksumming checks..."
-fi
-
-mkdir "$CLANG_BUILD_DIR/build"
+mkdir -p "$CLANG_BUILD_DIR/build"
 pushd "$CLANG_BUILD_DIR/build"
 
 # Run the build as specified in the build arguments.
 echo "Running build"
 cmake -GNinja \
   -DCMAKE_INSTALL_PREFIX="$CLANG_INSTALL_DIR" \
-  -DLLVM_ENABLE_PROJECTS="$CMAKE_LLVM_ENABLE_PROJECTS" \
   $CMAKE_ARGS \
   "$CLANG_BUILD_DIR/src/llvm"
 ninja $CMAKE_INSTALL_TARGETS
 
 popd
 
-# Pack the installed clang into an archive.
-echo "Archiving clang installation to /tmp/clang.tar.gz"
-cd "$CLANG_INSTALL_DIR"
-tar -czf /tmp/clang.tar.gz *
-
 # Cleanup.
-rm -rf "$CLANG_BUILD_DIR" "$CLANG_INSTALL_DIR"
+rm -rf "$CLANG_BUILD_DIR/build"
 
 echo "Done"
diff --git a/utils/docker/scripts/checkout.sh b/utils/docker/scripts/checkout.sh
new file mode 100755
index 000000000000..1bd034d18440
--- /dev/null
+++ b/utils/docker/scripts/checkout.sh
@@ -0,0 +1,173 @@
+#!/usr/bin/env bash
+#===- llvm/utils/docker/scripts/checkout.sh ---------------------===//
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===-----------------------------------------------------------------------===//
+
+set -e
+
+function show_usage() {
+  cat << EOF
+Usage: checkout.sh [options]
+
+Checkout svn sources into /tmp/clang-build/src. Used inside a docker container.
+
+Available options:
+  -h|--help           show this help message
+  -b|--branch         svn branch to checkout, i.e. 'trunk',
+                      'branches/release_40'
+                      (default: 'trunk')
+  -r|--revision       svn revision to checkout
+  -c|--cherrypick     revision to cherry-pick. Can be specified multiple times.
+                      Cherry-picks are performed in the sorted order using the
+                      following command:
+                      'svn patch <(svn diff -c \$rev)'.
+  -p|--llvm-project   name of an svn project to checkout.
+                      For clang, please use 'clang', not 'cfe'.
+                      Project 'llvm' is always included and ignored, if
+                      specified.
+                      Can be specified multiple times.
+EOF
+}
+
+LLVM_SVN_REV=""
+CHERRYPICKS=""
+LLVM_BRANCH=""
+# We always checkout llvm
+LLVM_PROJECTS="llvm"
+
+function contains_project() {
+  local TARGET_PROJ="$1"
+  local PROJ
+  for PROJ in $LLVM_PROJECTS; do
+    if [ "$PROJ" == "$TARGET_PROJ" ]; then
+      return 0
+    fi
+  done
+  return 1
+}
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    -r|--revision)
+      shift
+      LLVM_SVN_REV="$1"
+      shift
+      ;;
+    -c|--cherrypick)
+      shift
+      CHERRYPICKS="$CHERRYPICKS $1"
+      shift
+      ;;
+    -b|--branch)
+      shift
+      LLVM_BRANCH="$1"
+      shift
+      ;;
+    -p|--llvm-project)
+      shift
+      PROJ="$1"
+      shift
+
+      if [ "$PROJ" == "cfe" ]; then
+        PROJ="clang"
+      fi
+
+      if ! contains_project "$PROJ" ; then
+        if [ "$PROJ" == "clang-tools-extra" ] && [ ! contains_project "clang" ]; then
+          echo "Project 'clang-tools-extra' specified before 'clang'. Adding 'clang' to a list of projects first."
+          LLVM_PROJECTS="$LLVM_PROJECTS clang"
+        fi
+        LLVM_PROJECTS="$LLVM_PROJECTS $PROJ"
+      else
+        echo "Project '$PROJ' is already enabled, ignoring extra occurrences."
+      fi
+      ;;
+    -h|--help)
+      show_usage
+      exit 0
+      ;;
+    *)
+      echo "Unknown option: $1"
+      exit 1
+  esac
+done
+
+if [ "$LLVM_BRANCH" == "" ]; then
+  LLVM_BRANCH="trunk"
+fi
+
+if [ "$LLVM_SVN_REV" != "" ]; then
+  SVN_REV_ARG="-r$LLVM_SVN_REV"
+  echo "Checking out svn revision r$LLVM_SVN_REV."
+else
+  SVN_REV_ARG=""
+  echo "Checking out latest svn revision."
+fi
+
+# Sort cherrypicks and remove duplicates.
+CHERRYPICKS="$(echo "$CHERRYPICKS" | xargs -n1 | sort | uniq | xargs)"
+
+function apply_cherrypicks() {
+  local CHECKOUT_DIR="$1"
+
+  [ "$CHERRYPICKS" == "" ] || echo "Applying cherrypicks"
+  pushd "$CHECKOUT_DIR"
+
+  # This function is always called on a sorted list of cherrypicks.
+  for CHERRY_REV in $CHERRYPICKS; do
+    echo "Cherry-picking r$CHERRY_REV into $CHECKOUT_DIR"
+
+    local PATCH_FILE="$(mktemp)"
+    svn diff -c $CHERRY_REV > "$PATCH_FILE"
+    svn patch "$PATCH_FILE"
+    rm "$PATCH_FILE"
+  done
+
+  popd
+}
+
+CLANG_BUILD_DIR=/tmp/clang-build
+
+# Get the sources from svn.
+echo "Checking out sources from svn"
+mkdir -p "$CLANG_BUILD_DIR/src"
+for LLVM_PROJECT in $LLVM_PROJECTS; do
+  if [ "$LLVM_PROJECT" == "clang" ]; then
+    SVN_PROJECT="cfe"
+  else
+    SVN_PROJECT="$LLVM_PROJECT"
+  fi
+
+  if [ "$SVN_PROJECT" != "clang-tools-extra" ]; then
+    CHECKOUT_DIR="$CLANG_BUILD_DIR/src/$LLVM_PROJECT"
+  else
+    CHECKOUT_DIR="$CLANG_BUILD_DIR/src/clang/tools/extra"
+  fi
+
+  echo "Checking out https://llvm.org/svn/llvm-project/$SVN_PROJECT to $CHECKOUT_DIR"
+  svn co -q $SVN_REV_ARG \
+    "https://llvm.org/svn/llvm-project/$SVN_PROJECT/$LLVM_BRANCH" \
+    "$CHECKOUT_DIR"
+
+  # We apply cherrypicks to all repositories regardless of whether the revision
+  # changes this repository or not. For repositories not affected by the
+  # cherrypick, applying the cherrypick is a no-op.
+  apply_cherrypicks "$CHECKOUT_DIR"
+done
+
+CHECKSUMS_FILE="/tmp/checksums/checksums.txt"
+
+if [ -f "$CHECKSUMS_FILE" ]; then
+  echo "Validating checksums for LLVM checkout..."
+  python "$(dirname $0)/llvm_checksum/llvm_checksum.py" -c "$CHECKSUMS_FILE" \
+    --partial --multi_dir "$CLANG_BUILD_DIR/src"
+else
+  echo "Skipping checksumming checks..."
+fi
+
+echo "Done"
diff --git a/utils/emacs/llvm-mode.el b/utils/emacs/llvm-mode.el
index 3a609f249054..95c7f3c0c6d3 100644
--- a/utils/emacs/llvm-mode.el
+++ b/utils/emacs/llvm-mode.el
@@ -1,6 +1,7 @@
 ;;; llvm-mode.el --- Major mode for the LLVM assembler language.
 
 ;; Maintainer:  The LLVM team, http://llvm.org/
+;; Version: 1.0
 
 ;;; Commentary:
 
@@ -19,10 +20,18 @@
 
 (defvar llvm-font-lock-keywords
   (list
+   ;; Attributes
+   `(,(regexp-opt
+       '("alwaysinline" "argmemonly" "builtin" "cold" "convergent" "inaccessiblememonly"
+         "inaccessiblemem_or_argmemonly" "inlinehint" "jumptable" "minsize" "naked" "nobuiltin"
+         "noduplicate" "noimplicitfloat" "noinline" "nonlazybind" "noredzone" "noreturn"
+         "norecurse" "nounwind" "optnone" "optsize" "readnone" "readonly" "returns_twice"
+         "speculatable" "ssp" "sspreq" "sspstrong" "safestack" "sanitize_address" "sanitize_hwaddress"
+         "sanitize_thread" "sanitize_memory" "strictfp" "uwtable" "writeonly") 'symbols) . font-lock-constant-face)
    ;; Variables
-   '("%[-a-zA-Z$\._][-a-zA-Z$\._0-9]*" . font-lock-variable-name-face)
+   '("%[-a-zA-Z$._][-a-zA-Z$._0-9]*" . font-lock-variable-name-face)
    ;; Labels
-   '("[-a-zA-Z$\._0-9]+:" . font-lock-variable-name-face)
+   '("[-a-zA-Z$._0-9]+:" . font-lock-variable-name-face)
    ;; Unnamed variable slots
    '("%[-]?[0-9]+" . font-lock-variable-name-face)
    ;; Types
@@ -30,15 +39,25 @@
    ;; Integer literals
    '("\\b[-]?[0-9]+\\b" . font-lock-preprocessor-face)
    ;; Floating point constants
-   '("\\b[-+]?[0-9]+\.[0-9]*\([eE][-+]?[0-9]+\)?\\b" . font-lock-preprocessor-face)
+   '("\\b[-+]?[0-9]+.[0-9]*\\([eE][-+]?[0-9]+\\)?\\b" . font-lock-preprocessor-face)
    ;; Hex constants
    '("\\b0x[0-9A-Fa-f]+\\b" . font-lock-preprocessor-face)
    ;; Keywords
-   `(,(regexp-opt '("begin" "end" "true" "false" "zeroinitializer" "declare"
-                    "define" "global" "constant" "const" "internal" "linkonce" "linkonce_odr"
-                    "weak" "weak_odr" "appending" "uninitialized" "implementation" "..."
-                    "null" "undef" "to" "except" "not" "target" "endian" "little" "big"
-                    "pointersize" "volatile" "fastcc" "coldcc" "cc" "personality") 'symbols) . font-lock-keyword-face)
+   `(,(regexp-opt
+       '(;; Toplevel entities
+         "declare" "define" "module" "target" "source_filename" "global" "constant" "const"
+         "attributes" "uselistorder" "uselistorder_bb"
+         ;; Linkage types
+         "private" "internal" "weak" "weak_odr" "linkonce" "linkonce_odr" "available_externally" "appending" "common" "extern_weak" "external"
+         "uninitialized" "implementation" "..."
+         ;; Values
+         "true" "false" "null" "undef" "zeroinitializer" "none" "c" "asm" "blockaddress"
+
+         ;; Calling conventions
+         "ccc" "fastcc" "coldcc" "webkit_jscc" "anyregcc" "preserve_mostcc" "preserve_allcc"
+         "cxx_fast_tlscc" "swiftcc"
+
+         "atomic" "volatile" "personality" "prologue" "section") 'symbols) . font-lock-keyword-face)
    ;; Arithmetic and Logical Operators
    `(,(regexp-opt '("add" "sub" "mul" "sdiv" "udiv" "urem" "srem" "and" "or" "xor"
                     "setne" "seteq" "setlt" "setgt" "setle" "setge") 'symbols) . font-lock-keyword-face)
@@ -74,11 +93,11 @@
 \\{llvm-mode-map}
   Runs `llvm-mode-hook' on startup."
   (setq font-lock-defaults `(llvm-font-lock-keywords))
-  (setq comment-start ";"))
+  (setq-local comment-start ";"))
 
 ;; Associate .ll files with llvm-mode
 ;;;###autoload
-(add-to-list 'auto-mode-alist (cons (purecopy "\\.ll\\'")  'llvm-mode))
+(add-to-list 'auto-mode-alist (cons "\\.ll\\'" 'llvm-mode))
 
 (provide 'llvm-mode)
 
diff --git a/utils/extract_symbols.py b/utils/extract_symbols.py
index 96ae24c608e0..93ad2e9c3758 100755
--- a/utils/extract_symbols.py
+++ b/utils/extract_symbols.py
@@ -380,7 +380,7 @@ if __name__ == '__main__':
         print("Couldn't find a program to read symbols with", file=sys.stderr)
         exit(1)
     if not is_32bit_windows:
-        print("Couldn't find a program to determing the target", file=sys.stderr)
+        print("Couldn't find a program to determining the target", file=sys.stderr)
         exit(1)
 
     # How we determine which symbols to keep and which to discard depends on
diff --git a/utils/gdb-scripts/prettyprinters.py b/utils/gdb-scripts/prettyprinters.py
index 1a549f875d44..918411db42fb 100644
--- a/utils/gdb-scripts/prettyprinters.py
+++ b/utils/gdb-scripts/prettyprinters.py
@@ -1,4 +1,19 @@
 import gdb.printing
+
+class Iterator:
+  def __iter__(self):
+    return self
+
+  # Python 2 compatibility
+  def next(self):
+    return self.__next__()
+
+  def children(self):
+    return self
+
+def escape_bytes(val, l):
+  return '"' + val.string(encoding='Latin-1', length=l).encode('unicode_escape').decode() + '"'
+
 class SmallStringPrinter:
   """Print an llvm::SmallString object."""
 
@@ -7,11 +22,7 @@ class SmallStringPrinter:
 
   def to_string(self):
     begin = self.val['BeginX']
-    end = self.val['EndX']
-    return begin.cast(gdb.lookup_type("char").pointer()).string(length = end - begin)
-
-  def display_hint (self):
-    return 'string'
+    return escape_bytes(begin.cast(gdb.lookup_type('char').pointer()), self.val['Size'])
 
 class StringRefPrinter:
   """Print an llvm::StringRef object."""
@@ -20,49 +31,27 @@ class StringRefPrinter:
     self.val = val
 
   def to_string(self):
-    return self.val['Data'].string(length =  self.val['Length'])
-
-  def display_hint (self):
-    return 'string'
+    return escape_bytes(self.val['Data'], self.val['Length'])
 
-class SmallVectorPrinter:
+class SmallVectorPrinter(Iterator):
   """Print an llvm::SmallVector object."""
 
-  class _iterator:
-    def __init__(self, begin, end):
-      self.cur = begin
-      self.end = end
-      self.count = 0
-
-    def __iter__(self):
-      return self
-
-    def next(self):
-      if self.cur == self.end:
-        raise StopIteration
-      count = self.count
-      self.count = self.count + 1
-      cur = self.cur
-      self.cur = self.cur + 1
-      return '[%d]' % count, cur.dereference()
-
-    __next__ = next
-
   def __init__(self, val):
     self.val = val
-
-  def children(self):
-    t = self.val.type.template_argument(0).pointer()
-    begin = self.val['BeginX'].cast(t)
-    end = self.val['EndX'].cast(t)
-    return self._iterator(begin, end)
+    t = val.type.template_argument(0).pointer()
+    self.begin = val['BeginX'].cast(t)
+    self.size = val['Size']
+    self.i = 0
+
+  def __next__(self):
+    if self.i == self.size:
+      raise StopIteration
+    ret = '[{}]'.format(self.i), (self.begin+self.i).dereference()
+    self.i += 1
+    return ret
 
   def to_string(self):
-    t = self.val.type.template_argument(0).pointer()
-    begin = self.val['BeginX'].cast(t)
-    end = self.val['EndX'].cast(t)
-    capacity = self.val['CapacityX'].cast(t)
-    return 'llvm::SmallVector of length %d, capacity %d' % (end - begin, capacity - begin)
+    return 'llvm::SmallVector of Size {}, Capacity {}'.format(self.size, self.val['Capacity'])
 
   def display_hint (self):
     return 'array'
@@ -93,6 +82,8 @@ class ArrayRefPrinter:
   def __init__(self, val):
     self.val = val
 
+    __next__ = next
+
   def children(self):
     data = self.val['Data']
     return self._iterator(data, data + self.val['Length'])
@@ -103,33 +94,44 @@ class ArrayRefPrinter:
   def display_hint (self):
     return 'array'
 
-class OptionalPrinter:
-  """Print an llvm::Optional object."""
+class ExpectedPrinter(Iterator):
+  """Print an llvm::Expected object."""
+
+  def __init__(self, val):
+    self.val = val
 
-  def __init__(self, value):
-    self.value = value
+  def __next__(self):
+    val = self.val
+    if val is None:
+      raise StopIteration
+    self.val = None
+    if val['HasError']:
+      return ('error', val['ErrorStorage'].address.cast(
+          gdb.lookup_type('llvm::ErrorInfoBase').pointer()).dereference())
+    return ('value', val['TStorage'].address.cast(
+        val.type.template_argument(0).pointer()).dereference())
 
-  class _iterator:
-    def __init__(self, member, empty):
-      self.member = member
-      self.done = empty
+  def to_string(self):
+    return 'llvm::Expected{}'.format(' is error' if self.val['HasError'] else '')
 
-    def __iter__(self):
-      return self
+class OptionalPrinter(Iterator):
+  """Print an llvm::Optional object."""
 
-    def next(self):
-      if self.done:
-        raise StopIteration
-      self.done = True
-      return ('value', self.member.dereference())
+  def __init__(self, val):
+    self.val = val
 
-  def children(self):
-    if not self.value['hasVal']:
-      return self._iterator('', True)
-    return self._iterator(self.value['storage']['buffer'].address.cast(self.value.type.template_argument(0).pointer()), False)
+  def __next__(self):
+    val = self.val
+    if val is None:
+      raise StopIteration
+    self.val = None
+    if not val['Storage']['hasVal']:
+      raise StopIteration
+    return ('value', val['Storage']['storage']['buffer'].address.cast(
+        val.type.template_argument(0).pointer()).dereference())
 
   def to_string(self):
-    return 'llvm::Optional is %sinitialized' % ('' if self.value['hasVal'] else 'not ')
+    return 'llvm::Optional{}'.format('' if self.val['Storage']['hasVal'] else ' is not initialized')
 
 class DenseMapPrinter:
   "Print a DenseMap"
@@ -178,6 +180,8 @@ class DenseMapPrinter:
         self.first = False
       return 'x', v
 
+    __next__ = next
+
   def __init__(self, val):
     self.val = val
 
@@ -314,6 +318,7 @@ pp.add_printer('llvm::SmallString', '^llvm::SmallString<.*>$', SmallStringPrinte
 pp.add_printer('llvm::StringRef', '^llvm::StringRef$', StringRefPrinter)
 pp.add_printer('llvm::SmallVectorImpl', '^llvm::SmallVector(Impl)?<.*>$', SmallVectorPrinter)
 pp.add_printer('llvm::ArrayRef', '^llvm::(Const)?ArrayRef<.*>$', ArrayRefPrinter)
+pp.add_printer('llvm::Expected', '^llvm::Expected<.*>$', ExpectedPrinter)
 pp.add_printer('llvm::Optional', '^llvm::Optional<.*>$', OptionalPrinter)
 pp.add_printer('llvm::DenseMap', '^llvm::DenseMap<.*>$', DenseMapPrinter)
 pp.add_printer('llvm::Twine', '^llvm::Twine$', TwinePrinter)
diff --git a/utils/indirect_calls.py b/utils/indirect_calls.py
new file mode 100755
index 000000000000..b7349a6d884f
--- /dev/null
+++ b/utils/indirect_calls.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python
+
+"""A tool for looking for indirect jumps and calls in x86 binaries.
+
+   Helpful to verify whether or not retpoline mitigations are catching
+   all of the indirect branches in a binary and telling you which
+   functions the remaining ones are in (assembly, etc).
+
+   Depends on llvm-objdump being in your path and is tied to the
+   dump format.
+"""
+
+import os
+import sys
+import re
+import subprocess
+import optparse
+
+# Look for indirect calls/jmps in a binary. re: (call|jmp).*\* 
+def look_for_indirect(file):
+    args = ['llvm-objdump']
+    args.extend(["-d"])
+    args.extend([file])
+
+    p = subprocess.Popen(args=args, stdin=None, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
+    (stdout,stderr) = p.communicate()
+
+    function = ""
+    for line in stdout.splitlines():
+        if line.startswith(' ') == False:
+            function = line
+        result = re.search('(call|jmp).*\*', line)
+        if result != None:
+            # TODO: Perhaps use cxxfilt to demangle functions?
+            print function
+            print line
+    return
+
+def main(args):
+    # No options currently other than the binary.
+    parser = optparse.OptionParser("%prog [options] <binary>")
+    (opts, args) = parser.parse_args(args)
+    if len(args) != 2:
+        parser.error("invalid number of arguments: %s" % len(args))
+    look_for_indirect(args[1])
+
+if __name__ == '__main__':
+    main(sys.argv)
diff --git a/utils/kate/llvm-tablegen.xml b/utils/kate/llvm-tablegen.xml
new file mode 100644
index 000000000000..ccca1d3cf14e
--- /dev/null
+++ b/utils/kate/llvm-tablegen.xml
@@ -0,0 +1,134 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE language SYSTEM "language.dtd">
+<language name="LLVM TableGen" section="Sources" version="1.00" kateversion="3.4.4" extensions="*.td" mimetype="" author="LLVM Team" license="LLVM Release License">
+  <highlighting>
+    <list name="keywords">
+      <item> foreach </item>
+      <item> field </item>
+    </list>
+    <list name="operator-keywords">
+      <item> in </item>
+    </list>
+    <list name="bang-operators">
+      <item> !eq </item>
+      <item> !if </item>
+      <item> !head </item>
+      <item> !tail </item>
+      <item> !con </item>
+      <item> !add </item>
+      <item> !shl </item>
+      <item> !sra </item>
+      <item> !srl </item>
+      <item> !and </item>
+      <item> !or </item>
+      <item> !empty </item>
+      <item> !subst </item>
+      <item> !foreach </item>
+      <item> !strconcat </item>
+      <item> !cast </item>
+      <item> !listconcat </item>
+      <item> !size </item>
+      <item> !foldl </item>
+      <item> !isa </item>
+      <item> !dag </item>
+      <item> !le </item>
+      <item> !lt </item>
+      <item> !ge </item>
+      <item> !gt </item>
+      <item> !ne </item>
+    </list>
+    <list name="objects">
+      <item> class </item>
+      <item> def </item>
+      <item> defm </item>
+      <item> defset </item>
+      <item> let </item>
+      <item> multiclass </item>
+    </list>
+    <list name="class-like">
+      <item> class </item>
+      <item> def </item>
+      <item> defm </item>
+      <item> defset </item>
+      <item> multiclass </item>
+    </list>
+    <list name="variable-like">
+      <item> let </item>
+    </list>
+    <list name="types">
+      <item> string </item>
+      <item> code </item>
+      <item> bit </item>
+      <item> int </item>
+      <item> dag </item>
+      <item> bits </item>
+      <item> list </item>
+    </list>
+    <contexts>
+      <context name="llvm tablegen" attribute="Normal Text" lineEndContext="#stay">
+        <DetectSpaces/>
+        <Detect2Chars attribute="Comment" context="Single-line comment" char="/" char1="/"/>
+        <Detect2Chars attribute="Comment" context="Multi-line comment" char="/" char1="*" beginRegion="Comment"/>
+        <keyword attribute="Keyword" context="Definition" String="class-like"/>
+        <keyword attribute="Keyword" context="Let expression" String="variable-like"/>
+        <keyword attribute="Keyword" String="objects"/>
+        <keyword attribute="Keyword" String="keywords"/>
+        <keyword attribute="Keyword" String="operator-keywords"/>
+        <keyword attribute="Keyword" String="bang-operators"/>
+        <keyword attribute="Data Type" String="types"/>
+        <DetectChar attribute="Symbol" context="#stay" char="{" beginRegion="Brace1"/>
+        <DetectChar attribute="Symbol" context="#stay" char="}" endRegion="Brace1"/>
+        <Int attribute="Int" context="#stay"/>
+        <RegExpr attribute="Normal Text" String="[a-zA-Z_][a-zA-Z_0-9]{2,}" context="#stay"/>
+      </context>
+      <context attribute="Comment" lineEndContext="#pop" name="Single-line comment">
+        <IncludeRules context="##Alerts"/>
+        <LineContinue attribute="Comment" context="#stay"/>
+      </context>
+      <context attribute="Comment" lineEndContext="#stay" name="Multi-line comment">
+        <Detect2Chars attribute="Comment" context="Multi-line comment" char="/" char1="*" beginRegion="Comment"/>
+        <IncludeRules context="##Alerts"/>
+        <Detect2Chars attribute="Comment" context="#pop" char="*" char1="/" endRegion="Comment"/>
+      </context>
+      <context attribute="Normal Text" lineEndContext="#stay" name="Definition">
+        <DetectSpaces/>
+        <RegExpr attribute="Type" context="#stay" String="[a-zA-Z0-9_]+"/>
+        <DetectSpaces/>
+        <DetectChar char=":" />
+        <DetectSpaces/>
+        <RegExpr attribute="Type" context="#stay" String="[a-zA-Z0-9_]+"/>
+        <AnyChar context="#pop" lookAhead="true" String=" :;{&lt;" />
+      </context>
+      <context attribute="Normal Text" lineEndContext="#stay" name="Let expression">
+        <DetectSpaces/>
+        <RegExpr attribute="Variable" context="#pop" String="[a-zA-Z0-9_]+"/>
+      </context>
+    </contexts>
+    <itemDatas>
+      <itemData name="Normal Text" defStyleNum="dsNormal" spellChecking="false"/>
+      <itemData name="Operator" defStyleNum="dsOperator" spellChecking="false"/>
+      <itemData name="Keyword" defStyleNum="dsKeyword" spellChecking="false"/>
+      <itemData name="Data Type" defStyleNum="dsDataType" spellChecking="false"/>
+      <itemData name="Type" defStyleNum="dsDataType" spellChecking="false"/>
+      <itemData name="Variable" defStyleNum="dsVariable" spellChecking="false"/>
+      <itemData name="Int" defStyleNum="dsDecVal"/>
+      <itemData name="Hex" defStyleNum="dsBaseN"/>
+      <itemData name="Float" defStyleNum="dsFloat"/>
+      <itemData name="String" defStyleNum="dsString"/>
+      <itemData name="Comment" defStyleNum="dsComment"/>
+      <itemData name="Function" defStyleNum="dsFunction"/>
+      <itemData name="Symbol" defStyleNum="dsNormal" spellChecking="false"/>
+      <itemData name="SPECIAL" defStyleNum="dsAlert" strikeout="true" bold="true" underline="true" italic="true"/>
+    </itemDatas>
+  </highlighting>
+  <general>
+    <comments>
+      <comment name="singleLine" start="//"/>
+      <comment name="multiLine" start="/*" end="*/"/>
+    </comments>
+    <keywords casesensitive="1" weakDeliminator=".!"/>
+  </general>
+</language>
+<!--
+// kate: space-indent on; indent-width 2; replace-tabs on;
+-->
diff --git a/utils/lit/lit/LitConfig.py b/utils/lit/lit/LitConfig.py
index 389e5652e9be..e8fb1533a861 100644
--- a/utils/lit/lit/LitConfig.py
+++ b/utils/lit/lit/LitConfig.py
@@ -83,6 +83,8 @@ class LitConfig(object):
             Interface for setting maximum time to spend executing
             a single test
         """
+        if not isinstance(value, int):
+            self.fatal('maxIndividualTestTime must set to a value of type int.')
         self._maxIndividualTestTime = value
         if self.maxIndividualTestTime > 0:
             # The current implementation needs psutil to set
diff --git a/utils/lit/lit/Test.py b/utils/lit/lit/Test.py
index 1a9e3fe80fb3..9fa9064dc689 100644
--- a/utils/lit/lit/Test.py
+++ b/utils/lit/lit/Test.py
@@ -1,5 +1,5 @@
 import os
-from xml.sax.saxutils import escape
+from xml.sax.saxutils import quoteattr
 from json import JSONEncoder
 
 from lit.BooleanExpression import BooleanExpression
@@ -135,6 +135,8 @@ class Result(object):
         self.elapsed = elapsed
         # The metrics reported by this test.
         self.metrics = {}
+        # The micro-test results reported by this test.
+        self.microResults = {}
 
     def addMetric(self, name, value):
         """
@@ -153,6 +155,24 @@ class Result(object):
             raise TypeError("unexpected metric value: %r" % (value,))
         self.metrics[name] = value
 
+    def addMicroResult(self, name, microResult):
+        """
+        addMicroResult(microResult)
+
+        Attach a micro-test result to the test result, with the given name and
+        result.  It is an error to attempt to attach a micro-test with the 
+        same name multiple times.
+
+        Each micro-test result must be an instance of the Result class.
+        """
+        if name in self.microResults:
+            raise ValueError("Result already includes microResult for %r" % (
+                   name,))
+        if not isinstance(microResult, Result):
+            raise TypeError("unexpected MicroResult value %r" % (microResult,))
+        self.microResults[name] = microResult
+
+
 # Test classes.
 
 class TestSuite:
@@ -340,8 +360,9 @@ class Test:
         """
         return self.suite.config.is_early
 
-    def getJUnitXML(self):
-        test_name = self.path_in_suite[-1]
+    def writeJUnitXML(self, fil):
+        """Write the test's report xml representation to a file handle."""
+        test_name = quoteattr(self.path_in_suite[-1])
         test_path = self.path_in_suite[:-1]
         safe_test_path = [x.replace(".","_") for x in test_path]
         safe_name = self.suite.name.replace(".","-")
@@ -350,13 +371,28 @@ class Test:
             class_name = safe_name + "." + "/".join(safe_test_path) 
         else:
             class_name = safe_name + "." + safe_name
-
-        xml = "<testcase classname='" + class_name + "' name='" + \
-            test_name + "'"
-        xml += " time='%.2f'" % (self.result.elapsed,)
+        class_name = quoteattr(class_name)
+        testcase_template = '<testcase classname={class_name} name={test_name} time="{time:.2f}"'
+        elapsed_time = self.result.elapsed if self.result.elapsed is not None else 0.0
+        testcase_xml = testcase_template.format(class_name=class_name, test_name=test_name, time=elapsed_time)
+        fil.write(testcase_xml)
         if self.result.code.isFailure:
-            xml += ">\n\t<failure >\n" + escape(self.result.output)
-            xml += "\n\t</failure>\n</testcase>"
+            fil.write(">\n\t<failure ><![CDATA[")
+            if type(self.result.output) == unicode:
+                encoded_output = self.result.output.encode("utf-8", 'ignore')
+            else:
+                encoded_output = self.result.output
+            # In the unlikely case that the output contains the CDATA terminator
+            # we wrap it by creating a new CDATA block
+            fil.write(encoded_output.replace("]]>", "]]]]><![CDATA[>"))
+            fil.write("]]></failure>\n</testcase>")
+        elif self.result.code == UNSUPPORTED:
+            unsupported_features = self.getMissingRequiredFeatures()
+            if unsupported_features:
+                skip_message = "Skipping because of: " + ", ".join(unsupported_features)
+            else:
+                skip_message = "Skipping because of configuration."
+
+            fil.write(">\n\t<skipped message={} />\n</testcase>\n".format(quoteattr(skip_message)))
         else:
-            xml += "/>"
-        return xml
+            fil.write("/>")
diff --git a/utils/lit/lit/TestRunner.py b/utils/lit/lit/TestRunner.py
index a49e1536860d..e304381ff47a 100644
--- a/utils/lit/lit/TestRunner.py
+++ b/utils/lit/lit/TestRunner.py
@@ -2,6 +2,7 @@ from __future__ import absolute_import
 import difflib
 import errno
 import functools
+import io
 import itertools
 import getopt
 import os, signal, subprocess, sys
@@ -12,6 +13,7 @@ import shutil
 import tempfile
 import threading
 
+import io
 try:
     from StringIO import StringIO
 except ImportError:
@@ -36,6 +38,18 @@ kUseCloseFDs = not kIsWindows
 
 # Use temporary files to replace /dev/null on Windows.
 kAvoidDevNull = kIsWindows
+kDevNull = "/dev/null"
+
+# A regex that matches %dbg(ARG), which lit inserts at the beginning of each
+# run command pipeline such that ARG specifies the pipeline's source line
+# number.  lit later expands each %dbg(ARG) to a command that behaves as a null
+# command in the target shell so that the line number is seen in lit's verbose
+# mode.
+#
+# This regex captures ARG.  ARG must not contain a right parenthesis, which
+# terminates %dbg.  ARG must not contain quotes, in which ARG might be enclosed
+# during expansion.
+kPdbgRegex = '%dbg\(([^)\'"]*)\)'
 
 class ShellEnvironment(object):
 
@@ -156,7 +170,7 @@ def executeShCmd(cmd, shenv, results, timeout=0):
 
 def expand_glob(arg, cwd):
     if isinstance(arg, GlobItem):
-        return arg.resolve(cwd)
+        return sorted(arg.resolve(cwd))
     return [arg]
 
 def expand_glob_expressions(args, cwd):
@@ -346,14 +360,15 @@ def executeBuiltinDiff(cmd, cmd_shenv):
     """executeBuiltinDiff - Compare files line by line."""
     args = expand_glob_expressions(cmd.args, cmd_shenv.cwd)[1:]
     try:
-        opts, args = getopt.gnu_getopt(args, "wbu", ["strip-trailing-cr"])
+        opts, args = getopt.gnu_getopt(args, "wbur", ["strip-trailing-cr"])
     except getopt.GetoptError as err:
         raise InternalShellError(cmd, "Unsupported: 'diff':  %s" % str(err))
 
-    filelines, filepaths = ([] for i in range(2))
+    filelines, filepaths, dir_trees = ([] for i in range(3))
     ignore_all_space = False
     ignore_space_change = False
     unified_diff = False
+    recursive_diff = False
     strip_trailing_cr = False
     for o, a in opts:
         if o == "-w":
@@ -362,6 +377,8 @@ def executeBuiltinDiff(cmd, cmd_shenv):
             ignore_space_change = True
         elif o == "-u":
             unified_diff = True
+        elif o == "-r":
+            recursive_diff = True
         elif o == "--strip-trailing-cr":
             strip_trailing_cr = True
         else:
@@ -370,17 +387,70 @@ def executeBuiltinDiff(cmd, cmd_shenv):
     if len(args) != 2:
         raise InternalShellError(cmd, "Error:  missing or extra operand")
 
-    stderr = StringIO()
-    stdout = StringIO()
-    exitCode = 0
-    try:
-        for file in args:
-            if not os.path.isabs(file):
-                file = os.path.realpath(os.path.join(cmd_shenv.cwd, file))
-            filepaths.append(file)
-            with open(file, 'r') as f:
+    def getDirTree(path, basedir=""):
+        # Tree is a tuple of form (dirname, child_trees).
+        # An empty dir has child_trees = [], a file has child_trees = None.
+        child_trees = []
+        for dirname, child_dirs, files in os.walk(os.path.join(basedir, path)):
+            for child_dir in child_dirs:
+                child_trees.append(getDirTree(child_dir, dirname))
+            for filename in files:
+                child_trees.append((filename, None))
+            return path, sorted(child_trees)
+
+    def compareTwoFiles(filepaths):
+        compare_bytes = False
+        encoding = None
+        filelines = []
+        for file in filepaths:
+            try:
+                with open(file, 'r') as f:
+                    filelines.append(f.readlines())
+            except UnicodeDecodeError:
+                try:
+                    with io.open(file, 'r', encoding="utf-8") as f:
+                        filelines.append(f.readlines())
+                    encoding = "utf-8"
+                except:
+                    compare_bytes = True
+
+        if compare_bytes:
+            return compareTwoBinaryFiles(filepaths)
+        else:
+            return compareTwoTextFiles(filepaths, encoding)
+
+    def compareTwoBinaryFiles(filepaths):
+        filelines = []
+        for file in filepaths:
+            with open(file, 'rb') as f:
                 filelines.append(f.readlines())
 
+        exitCode = 0
+        if hasattr(difflib, 'diff_bytes'):
+            # python 3.5 or newer
+            diffs = difflib.diff_bytes(difflib.unified_diff, filelines[0], filelines[1], filepaths[0].encode(), filepaths[1].encode())
+            diffs = [diff.decode() for diff in diffs]
+        else:
+            # python 2.7
+            func = difflib.unified_diff if unified_diff else difflib.context_diff
+            diffs = func(filelines[0], filelines[1], filepaths[0], filepaths[1])
+
+        for diff in diffs:
+            stdout.write(diff)
+            exitCode = 1
+        return exitCode
+
+    def compareTwoTextFiles(filepaths, encoding):
+        filelines = []
+        for file in filepaths:
+            if encoding is None:
+                with open(file, 'r') as f:
+                    filelines.append(f.readlines())
+            else:
+                with io.open(file, 'r', encoding=encoding) as f:
+                    filelines.append(f.readlines())
+
+        exitCode = 0
         def compose2(f, g):
             return lambda x: f(g(x))
 
@@ -399,6 +469,99 @@ def executeBuiltinDiff(cmd, cmd_shenv):
         for diff in func(filelines[0], filelines[1], filepaths[0], filepaths[1]):
             stdout.write(diff)
             exitCode = 1
+        return exitCode
+
+    def printDirVsFile(dir_path, file_path):
+        if os.path.getsize(file_path):
+            msg = "File %s is a directory while file %s is a regular file"
+        else:
+            msg = "File %s is a directory while file %s is a regular empty file"
+        stdout.write(msg % (dir_path, file_path) + "\n")
+
+    def printFileVsDir(file_path, dir_path):
+        if os.path.getsize(file_path):
+            msg = "File %s is a regular file while file %s is a directory"
+        else:
+            msg = "File %s is a regular empty file while file %s is a directory"
+        stdout.write(msg % (file_path, dir_path) + "\n")
+
+    def printOnlyIn(basedir, path, name):
+        stdout.write("Only in %s: %s\n" % (os.path.join(basedir, path), name))
+
+    def compareDirTrees(dir_trees, base_paths=["", ""]):
+        # Dirnames of the trees are not checked, it's caller's responsibility,
+        # as top-level dirnames are always different. Base paths are important
+        # for doing os.walk, but we don't put it into tree's dirname in order
+        # to speed up string comparison below and while sorting in getDirTree.
+        left_tree, right_tree = dir_trees[0], dir_trees[1]
+        left_base, right_base = base_paths[0], base_paths[1]
+
+        # Compare two files or report file vs. directory mismatch.
+        if left_tree[1] is None and right_tree[1] is None:
+            return compareTwoFiles([os.path.join(left_base, left_tree[0]),
+                                    os.path.join(right_base, right_tree[0])])
+
+        if left_tree[1] is None and right_tree[1] is not None:
+            printFileVsDir(os.path.join(left_base, left_tree[0]),
+                           os.path.join(right_base, right_tree[0]))
+            return 1
+
+        if left_tree[1] is not None and right_tree[1] is None:
+            printDirVsFile(os.path.join(left_base, left_tree[0]),
+                           os.path.join(right_base, right_tree[0]))
+            return 1
+
+        # Compare two directories via recursive use of compareDirTrees.
+        exitCode = 0
+        left_names = [node[0] for node in left_tree[1]]
+        right_names = [node[0] for node in right_tree[1]]
+        l, r = 0, 0
+        while l < len(left_names) and r < len(right_names):
+            # Names are sorted in getDirTree, rely on that order.
+            if left_names[l] < right_names[r]:
+                exitCode = 1
+                printOnlyIn(left_base, left_tree[0], left_names[l])
+                l += 1
+            elif left_names[l] > right_names[r]:
+                exitCode = 1
+                printOnlyIn(right_base, right_tree[0], right_names[r])
+                r += 1
+            else:
+                exitCode |= compareDirTrees([left_tree[1][l], right_tree[1][r]],
+                                            [os.path.join(left_base, left_tree[0]),
+                                            os.path.join(right_base, right_tree[0])])
+                l += 1
+                r += 1
+
+        # At least one of the trees has ended. Report names from the other tree.
+        while l < len(left_names):
+            exitCode = 1
+            printOnlyIn(left_base, left_tree[0], left_names[l])
+            l += 1
+        while r < len(right_names):
+            exitCode = 1
+            printOnlyIn(right_base, right_tree[0], right_names[r])
+            r += 1
+        return exitCode
+
+    stderr = StringIO()
+    stdout = StringIO()
+    exitCode = 0
+    try:
+        for file in args:
+            if not os.path.isabs(file):
+                file = os.path.realpath(os.path.join(cmd_shenv.cwd, file))
+    
+            if recursive_diff:
+                dir_trees.append(getDirTree(file))
+            else:
+                filepaths.append(file)
+
+        if not recursive_diff:
+            exitCode = compareTwoFiles(filepaths)
+        else:
+            exitCode = compareDirTrees(dir_trees)
+
     except IOError as err:
         stderr.write("Error: 'diff' command failed, %s\n" % str(err))
         exitCode = 1
@@ -523,7 +686,7 @@ def processRedirects(cmd, stdin_source, cmd_shenv, opened_files):
            raise InternalShellError(cmd, "Unsupported: glob in "
                                     "redirect expanded to multiple files")
         name = name[0]
-        if kAvoidDevNull and name == '/dev/null':
+        if kAvoidDevNull and name == kDevNull:
             fd = tempfile.TemporaryFile(mode=mode)
         elif kIsWindows and name == '/dev/tty':
             # Simulate /dev/tty on Windows.
@@ -637,11 +800,20 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper):
         results.append(cmdResult)
         return cmdResult.exitCode
 
+    if cmd.commands[0].args[0] == ':':
+        if len(cmd.commands) != 1:
+            raise InternalShellError(cmd.commands[0], "Unsupported: ':' "
+                                     "cannot be part of a pipeline")
+        results.append(ShellCommandResult(cmd.commands[0], '', '', 0, False))
+        return 0;
+
     procs = []
     default_stdin = subprocess.PIPE
     stderrTempFiles = []
     opened_files = []
     named_temp_files = []
+    builtin_commands = set(['cat'])
+    builtin_commands_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "builtin_commands")
     # To avoid deadlock, we use a single stderr stream for piped
     # output. This is null until we have seen some output using
     # stderr.
@@ -677,27 +849,38 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper):
         # Resolve the executable path ourselves.
         args = list(j.args)
         executable = None
-        # For paths relative to cwd, use the cwd of the shell environment.
-        if args[0].startswith('.'):
-            exe_in_cwd = os.path.join(cmd_shenv.cwd, args[0])
-            if os.path.isfile(exe_in_cwd):
-                executable = exe_in_cwd
-        if not executable:
-            executable = lit.util.which(args[0], cmd_shenv.env['PATH'])
-        if not executable:
-            raise InternalShellError(j, '%r: command not found' % j.args[0])
+        is_builtin_cmd = args[0] in builtin_commands;
+        if not is_builtin_cmd:
+            # For paths relative to cwd, use the cwd of the shell environment.
+            if args[0].startswith('.'):
+                exe_in_cwd = os.path.join(cmd_shenv.cwd, args[0])
+                if os.path.isfile(exe_in_cwd):
+                    executable = exe_in_cwd
+            if not executable:
+                executable = lit.util.which(args[0], cmd_shenv.env['PATH'])
+            if not executable:
+                raise InternalShellError(j, '%r: command not found' % j.args[0])
 
         # Replace uses of /dev/null with temporary files.
         if kAvoidDevNull:
+            # In Python 2.x, basestring is the base class for all string (including unicode)
+            # In Python 3.x, basestring no longer exist and str is always unicode
+            try:
+                str_type = basestring
+            except NameError:
+                str_type = str
             for i,arg in enumerate(args):
-                if arg == "/dev/null":
+                if isinstance(arg, str_type) and kDevNull in arg:
                     f = tempfile.NamedTemporaryFile(delete=False)
                     f.close()
                     named_temp_files.append(f.name)
-                    args[i] = f.name
+                    args[i] = arg.replace(kDevNull, f.name)
 
         # Expand all glob expressions
         args = expand_glob_expressions(args, cmd_shenv.cwd)
+        if is_builtin_cmd:
+            args.insert(0, "python")
+            args[1] = os.path.join(builtin_commands_dir ,args[1] + ".py")
 
         # On Windows, do our own command line quoting for better compatibility
         # with some core utility distributions.
@@ -757,11 +940,6 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper):
         f.seek(0, 0)
         procData[i] = (procData[i][0], f.read())
 
-    def to_string(bytes):
-        if isinstance(bytes, str):
-            return bytes
-        return bytes.encode('utf-8')
-
     exitCode = None
     for i,(out,err) in enumerate(procData):
         res = procs[i].wait()
@@ -822,7 +1000,8 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper):
 
 def executeScriptInternal(test, litConfig, tmpBase, commands, cwd):
     cmds = []
-    for ln in commands:
+    for i, ln in enumerate(commands):
+        ln = commands[i] = re.sub(kPdbgRegex, ": '\\1'; ", ln)
         try:
             cmds.append(ShUtil.ShParser(ln, litConfig.isWindows,
                                         test.config.pipefail).parse())
@@ -906,9 +1085,16 @@ def executeScript(test, litConfig, tmpBase, commands, cwd):
       mode += 'b'  # Avoid CRLFs when writing bash scripts.
     f = open(script, mode)
     if isWin32CMDEXE:
-        f.write('@echo off\n')
-        f.write('\nif %ERRORLEVEL% NEQ 0 EXIT\n'.join(commands))
+        for i, ln in enumerate(commands):
+            commands[i] = re.sub(kPdbgRegex, "echo '\\1' > nul && ", ln)
+        if litConfig.echo_all_commands:
+            f.write('@echo on\n')
+        else:
+            f.write('@echo off\n')
+        f.write('\n@if %ERRORLEVEL% NEQ 0 EXIT\n'.join(commands))
     else:
+        for i, ln in enumerate(commands):
+            commands[i] = re.sub(kPdbgRegex, ": '\\1'; ", ln)
         if test.config.pipefail:
             f.write('set -o pipefail;')
         if litConfig.echo_all_commands:
@@ -1141,7 +1327,9 @@ class IntegratedTestKeywordParser(object):
         self.parser = parser
 
         if kind == ParserKind.COMMAND:
-            self.parser = self._handleCommand
+            self.parser = lambda line_number, line, output: \
+                                 self._handleCommand(line_number, line, output,
+                                                     self.keyword)
         elif kind == ParserKind.LIST:
             self.parser = self._handleList
         elif kind == ParserKind.BOOLEAN_EXPR:
@@ -1172,7 +1360,7 @@ class IntegratedTestKeywordParser(object):
         return (not line.strip() or output)
 
     @staticmethod
-    def _handleCommand(line_number, line, output):
+    def _handleCommand(line_number, line, output, keyword):
         """A helper for parsing COMMAND type keywords"""
         # Trim trailing whitespace.
         line = line.rstrip()
@@ -1191,6 +1379,14 @@ class IntegratedTestKeywordParser(object):
         else:
             if output is None:
                 output = []
+            pdbg = "%dbg({keyword} at line {line_number})".format(
+                keyword=keyword,
+                line_number=line_number)
+            assert re.match(kPdbgRegex + "$", pdbg), \
+                   "kPdbgRegex expected to match actual %dbg usage"
+            line = "{pdbg} {real_command}".format(
+                pdbg=pdbg,
+                real_command=line)
             output.append(line)
         return output
 
diff --git a/utils/lit/lit/TestingConfig.py b/utils/lit/lit/TestingConfig.py
index c729ec060ace..e2ac73b0b426 100644
--- a/utils/lit/lit/TestingConfig.py
+++ b/utils/lit/lit/TestingConfig.py
@@ -22,10 +22,11 @@ class TestingConfig:
             }
 
         pass_vars = ['LIBRARY_PATH', 'LD_LIBRARY_PATH', 'SYSTEMROOT', 'TERM',
-                     'LD_PRELOAD', 'ASAN_OPTIONS', 'UBSAN_OPTIONS',
+                     'CLANG', 'LD_PRELOAD', 'ASAN_OPTIONS', 'UBSAN_OPTIONS',
                      'LSAN_OPTIONS', 'ADB', 'ANDROID_SERIAL',
                      'SANITIZER_IGNORE_CVE_2016_2143', 'TMPDIR', 'TMP', 'TEMP',
-                     'TEMPDIR', 'AVRLIT_BOARD', 'AVRLIT_PORT']
+                     'TEMPDIR', 'AVRLIT_BOARD', 'AVRLIT_PORT',
+                     'FILECHECK_DUMP_INPUT_ON_FAILURE']
         for var in pass_vars:
             val = os.environ.get(var, '')
             # Check for empty string as some variables such as LD_PRELOAD cannot be empty
@@ -152,3 +153,28 @@ class TestingConfig:
         else:
             return self.parent.root
 
+class SubstituteCaptures:
+    """
+    Helper class to indicate that the substitutions contains backreferences.
+
+    This can be used as the following in lit.cfg to mark subsitutions as having
+    back-references::
+
+        config.substutions.append(('\b[^ ]*.cpp', SubstituteCaptures('\0.txt')))
+
+    """
+    def __init__(self, substitution):
+        self.substitution = substitution
+
+    def replace(self, pattern, replacement):
+        return self.substitution
+
+    def __str__(self):
+        return self.substitution
+
+    def __len__(self):
+        return len(self.substitution)
+
+    def __getitem__(self, item):
+        return self.substitution.__getitem__(item)
+
diff --git a/utils/lit/lit/__init__.py b/utils/lit/lit/__init__.py
index cd84ea4ed6ea..cc166b2a3e3b 100644
--- a/utils/lit/lit/__init__.py
+++ b/utils/lit/lit/__init__.py
@@ -2,7 +2,7 @@
 
 __author__ = 'Daniel Dunbar'
 __email__ = 'daniel@minormatter.com'
-__versioninfo__ = (0, 6, 0)
+__versioninfo__ = (0, 7, 0)
 __version__ = '.'.join(str(v) for v in __versioninfo__) + 'dev'
 
 __all__ = []
diff --git a/utils/lit/lit/builtin_commands/cat.py b/utils/lit/lit/builtin_commands/cat.py
new file mode 100644
index 000000000000..fab9dccda27b
--- /dev/null
+++ b/utils/lit/lit/builtin_commands/cat.py
@@ -0,0 +1,67 @@
+import getopt
+import sys
+try:
+    from StringIO import StringIO
+except ImportError:
+    from io import StringIO
+
+def convertToCaretAndMNotation(data):
+   newdata = StringIO()
+   if isinstance(data, str):
+       data = bytearray(data)
+
+   for intval in data:
+       if intval == 9 or intval == 10:
+           newdata.write(chr(intval))
+           continue
+       if intval > 127:
+           intval = intval -128
+           newdata.write("M-")
+       if intval < 32:
+           newdata.write("^")
+           newdata.write(chr(intval+64))
+       elif intval == 127:
+           newdata.write("^?")
+       else:
+           newdata.write(chr(intval))
+
+   return newdata.getvalue().encode()
+
+
+def main(argv):
+    arguments = argv[1:]
+    short_options = "v"
+    long_options = ["show-nonprinting"]
+    show_nonprinting = False;
+
+    try:
+        options, filenames = getopt.gnu_getopt(arguments, short_options, long_options)
+    except getopt.GetoptError as err:
+        sys.stderr.write("Unsupported: 'cat':  %s\n" % str(err))
+        sys.exit(1)
+
+    for option, value in options:
+        if option == "-v" or option == "--show-nonprinting":
+            show_nonprinting = True;
+
+    writer = getattr(sys.stdout, 'buffer', None)
+    if writer is None:
+        writer = sys.stdout
+        if sys.platform == "win32":
+            import os, msvcrt
+            msvcrt.setmode(sys.stdout.fileno(),os.O_BINARY)
+    for filename in filenames:
+        try:
+            fileToCat = open(filename,"rb")
+            contents = fileToCat.read()
+            if show_nonprinting:
+                contents = convertToCaretAndMNotation(contents)
+            writer.write(contents)
+            sys.stdout.flush()
+            fileToCat.close()
+        except IOError as error:
+            sys.stderr.write(str(error))
+            sys.exit(1)
+
+if __name__ == "__main__":
+    main(sys.argv)
diff --git a/utils/lit/lit/llvm/config.py b/utils/lit/lit/llvm/config.py
index 554da93f110b..2257fb6db679 100644
--- a/utils/lit/lit/llvm/config.py
+++ b/utils/lit/lit/llvm/config.py
@@ -101,10 +101,6 @@ class LLVMConfig(object):
                 self.with_environment(
                     'DYLD_INSERT_LIBRARIES', gmalloc_path_str)
 
-        breaking_checks = getattr(config, 'enable_abi_breaking_checks', None)
-        if lit.util.pythonize_bool(breaking_checks):
-            features.add('abi-breaking-checks')
-
     def with_environment(self, variable, value, append_path=False):
         if append_path:
             # For paths, we should be able to take a list of them and process all
@@ -392,7 +388,7 @@ class LLVMConfig(object):
         builtin_include_dir = self.get_clang_builtin_include_dir(self.config.clang)
         tool_substitutions = [
             ToolSubst('%clang', command=self.config.clang),
-            ToolSubst('%clang_analyze_cc1', command='%clang_cc1', extra_args=['-analyze']),
+            ToolSubst('%clang_analyze_cc1', command='%clang_cc1', extra_args=['-analyze', '%analyze']),
             ToolSubst('%clang_cc1', command=self.config.clang, extra_args=['-cc1', '-internal-isystem', builtin_include_dir, '-nostdsysteminc']),
             ToolSubst('%clang_cpp', command=self.config.clang, extra_args=['--driver-mode=cpp']),
             ToolSubst('%clang_cl', command=self.config.clang, extra_args=['--driver-mode=cl']),
@@ -465,9 +461,6 @@ class LLVMConfig(object):
         self.with_environment('PATH', tool_dirs, append_path=True)
         self.with_environment('LD_LIBRARY_PATH', lib_dirs, append_path=True)
 
-        self.config.substitutions.append(
-            (r"\bld.lld\b", 'ld.lld --full-shutdown'))
-
-        tool_patterns = ['ld.lld', 'lld-link', 'lld']
+        tool_patterns = ['lld', 'ld.lld', 'lld-link', 'ld64.lld', 'wasm-ld']
 
         self.add_tool_substitutions(tool_patterns, tool_dirs)
diff --git a/utils/lit/lit/main.py b/utils/lit/lit/main.py
index 95a5500a504c..807360ac5b9d 100755
--- a/utils/lit/lit/main.py
+++ b/utils/lit/lit/main.py
@@ -16,6 +16,7 @@ import time
 import argparse
 import tempfile
 import shutil
+from xml.sax.saxutils import quoteattr
 
 import lit.ProgressBar
 import lit.LitConfig
@@ -81,6 +82,18 @@ class TestingProgressDisplay(object):
                 print('%s: %s ' % (metric_name, value.format()))
             print("*" * 10)
 
+        # Report micro-tests, if present
+        if test.result.microResults:
+            items = sorted(test.result.microResults.items())
+            for micro_test_name, micro_test in items:
+                print("%s MICRO-TEST: %s" %
+                         ('*'*3, micro_test_name))
+   
+                if micro_test.metrics:
+                    sorted_metrics = sorted(micro_test.metrics.items())
+                    for metric_name, value in sorted_metrics:
+                        print('    %s:  %s ' % (metric_name, value.format()))
+
         # Ensure the output is flushed.
         sys.stdout.flush()
 
@@ -113,6 +126,25 @@ def write_test_results(run, lit_config, testing_time, output_path):
             for key, value in test.result.metrics.items():
                 metrics_data[key] = value.todata()
 
+        # Report micro-tests separately, if present
+        if test.result.microResults:
+            for key, micro_test in test.result.microResults.items():
+                # Expand parent test name with micro test name
+                parent_name = test.getFullName()
+                micro_full_name = parent_name + ':' + key
+
+                micro_test_data = {
+                    'name' : micro_full_name,
+                    'code' : micro_test.code.name,
+                    'output' : micro_test.output,
+                    'elapsed' : micro_test.elapsed }
+                if micro_test.metrics:
+                    micro_test_data['metrics'] = micro_metrics_data = {}
+                    for key, value in micro_test.metrics.items():
+                        micro_metrics_data[key] = value.todata()
+
+                tests_data.append(micro_test_data)
+
         tests_data.append(test_data)
 
     # Write the output.
@@ -566,24 +598,30 @@ def main_with_tmp(builtinParameters):
                 by_suite[suite] = {
                                    'passes'   : 0,
                                    'failures' : 0,
+                                   'skipped': 0,
                                    'tests'    : [] }
             by_suite[suite]['tests'].append(result_test)
             if result_test.result.code.isFailure:
                 by_suite[suite]['failures'] += 1
+            elif result_test.result.code == lit.Test.UNSUPPORTED:
+                by_suite[suite]['skipped'] += 1
             else:
                 by_suite[suite]['passes'] += 1
         xunit_output_file = open(opts.xunit_output_file, "w")
         xunit_output_file.write("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n")
         xunit_output_file.write("<testsuites>\n")
         for suite_name, suite in by_suite.items():
-            safe_suite_name = suite_name.replace(".", "-")
-            xunit_output_file.write("<testsuite name='" + safe_suite_name + "'")
-            xunit_output_file.write(" tests='" + str(suite['passes'] + 
-              suite['failures']) + "'")
-            xunit_output_file.write(" failures='" + str(suite['failures']) + 
-              "'>\n")
+            safe_suite_name = quoteattr(suite_name.replace(".", "-"))
+            xunit_output_file.write("<testsuite name=" + safe_suite_name)
+            xunit_output_file.write(" tests=\"" + str(suite['passes'] +
+              suite['failures'] + suite['skipped']) + "\"")
+            xunit_output_file.write(" failures=\"" + str(suite['failures']) + "\"")
+            xunit_output_file.write(" skipped=\"" + str(suite['skipped']) +
+              "\">\n")
+
             for result_test in suite['tests']:
-                xunit_output_file.write(result_test.getJUnitXML() + "\n")
+                result_test.writeJUnitXML(xunit_output_file)
+                xunit_output_file.write("\n")
             xunit_output_file.write("</testsuite>\n")
         xunit_output_file.write("</testsuites>")
         xunit_output_file.close()
diff --git a/utils/lit/lit/util.py b/utils/lit/lit/util.py
index 5f20262d4c35..e20c4ab90153 100644
--- a/utils/lit/lit/util.py
+++ b/utils/lit/lit/util.py
@@ -194,7 +194,7 @@ def which(command, paths=None):
         paths = os.environ.get('PATH', '')
 
     # Check for absolute match first.
-    if os.path.isfile(command):
+    if os.path.isabs(command) and os.path.isfile(command):
         return os.path.normpath(command)
 
     # Would be nice if Python had a lib function for this.
diff --git a/utils/lit/tests/Inputs/shtest-run-at-line/external-shell/basic.txt b/utils/lit/tests/Inputs/shtest-run-at-line/external-shell/basic.txt
new file mode 100644
index 000000000000..a359c996660d
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-run-at-line/external-shell/basic.txt
@@ -0,0 +1,6 @@
+# These commands must run under both bash and windows cmd.exe (with GnuWin32
+# tools).
+
+# RUN: true
+# RUN: false
+# RUN: true
diff --git a/utils/lit/tests/Inputs/shtest-run-at-line/external-shell/line-continuation.txt b/utils/lit/tests/Inputs/shtest-run-at-line/external-shell/line-continuation.txt
new file mode 100644
index 000000000000..b11f0d759fe7
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-run-at-line/external-shell/line-continuation.txt
@@ -0,0 +1,12 @@
+# These commands must run under both bash and windows cmd.exe (with GnuWin32
+# tools).
+
+# RUN: echo 'foo bar' \
+# RUN: | FileCheck %s
+# RUN: echo \
+# RUN: 'foo baz' \
+# RUN: | FileCheck %s
+# RUN: echo 'foo bar' \
+# RUN: | FileCheck %s
+
+# CHECK: foo bar
diff --git a/utils/lit/tests/Inputs/shtest-run-at-line/external-shell/lit.local.cfg b/utils/lit/tests/Inputs/shtest-run-at-line/external-shell/lit.local.cfg
new file mode 100644
index 000000000000..5e87c7299198
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-run-at-line/external-shell/lit.local.cfg
@@ -0,0 +1,2 @@
+import lit.formats
+config.test_format = lit.formats.ShTest(execute_external=True)
diff --git a/utils/lit/tests/Inputs/shtest-run-at-line/internal-shell/basic.txt b/utils/lit/tests/Inputs/shtest-run-at-line/internal-shell/basic.txt
new file mode 100644
index 000000000000..ba2695431bd9
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-run-at-line/internal-shell/basic.txt
@@ -0,0 +1,3 @@
+# RUN: true
+# RUN: false
+# RUN: true
diff --git a/utils/lit/tests/Inputs/shtest-run-at-line/internal-shell/line-continuation.txt b/utils/lit/tests/Inputs/shtest-run-at-line/internal-shell/line-continuation.txt
new file mode 100644
index 000000000000..1e00bcb96163
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-run-at-line/internal-shell/line-continuation.txt
@@ -0,0 +1,11 @@
+# RUN: : first line continued \
+# RUN:   to second line
+# RUN: echo 'foo bar' \
+# RUN: | FileCheck %s
+# RUN: echo \
+# RUN: 'foo baz' \
+# RUN: | FileCheck %s
+# RUN: echo 'foo bar' \
+# RUN: | FileCheck %s
+
+# CHECK: foo bar
diff --git a/utils/lit/tests/Inputs/shtest-run-at-line/internal-shell/lit.local.cfg b/utils/lit/tests/Inputs/shtest-run-at-line/internal-shell/lit.local.cfg
new file mode 100644
index 000000000000..b76b7a24c99e
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-run-at-line/internal-shell/lit.local.cfg
@@ -0,0 +1,2 @@
+import lit.formats
+config.test_format = lit.formats.ShTest(execute_external=False)
diff --git a/utils/lit/tests/Inputs/shtest-run-at-line/lit.cfg b/utils/lit/tests/Inputs/shtest-run-at-line/lit.cfg
new file mode 100644
index 000000000000..f4c7921b7326
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-run-at-line/lit.cfg
@@ -0,0 +1,2 @@
+config.name = 'shtest-run-at-line'
+config.suffixes = ['.txt']
diff --git a/utils/lit/tests/Inputs/shtest-shell/cat-error-0.txt b/utils/lit/tests/Inputs/shtest-shell/cat-error-0.txt
new file mode 100644
index 000000000000..690bc1e9e928
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/cat-error-0.txt
@@ -0,0 +1,3 @@
+# Check error on an unsupported option for cat .
+#
+# RUN: cat -b temp1.txt
diff --git a/utils/lit/tests/Inputs/shtest-shell/cat-error-1.txt b/utils/lit/tests/Inputs/shtest-shell/cat-error-1.txt
new file mode 100644
index 000000000000..5344707e9664
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/cat-error-1.txt
@@ -0,0 +1,3 @@
+# Check error on a unsupported cat (Unable to find input file).
+#
+# RUN: cat temp1.txt
diff --git a/utils/lit/tests/Inputs/shtest-shell/cat_nonprinting.bin b/utils/lit/tests/Inputs/shtest-shell/cat_nonprinting.bin
new file mode 100644
index 000000000000..b4af9817b357
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/cat_nonprinting.bin
diff --git a/utils/lit/tests/Inputs/shtest-shell/check_args.py b/utils/lit/tests/Inputs/shtest-shell/check_args.py
new file mode 100644
index 000000000000..2f7a2503b976
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/check_args.py
@@ -0,0 +1,14 @@
+#!/usr/bin/env python
+
+import argparse
+import platform
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--my_arg", "-a")
+
+args = parser.parse_args()
+
+answer = (platform.system() == "Windows" and
+          args.my_arg == "/dev/null" and "ERROR") or "OK"
+
+print(answer)
diff --git a/utils/lit/tests/Inputs/shtest-shell/colon-error.txt b/utils/lit/tests/Inputs/shtest-shell/colon-error.txt
new file mode 100644
index 000000000000..8b84c08ce099
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/colon-error.txt
@@ -0,0 +1,3 @@
+# Check error on an unsupported ":". (cannot be part of a pipeline)
+#
+# RUN: : | echo "hello"
diff --git a/utils/lit/tests/Inputs/shtest-shell/dev-null.txt b/utils/lit/tests/Inputs/shtest-shell/dev-null.txt
new file mode 100644
index 000000000000..5b742489cc80
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/dev-null.txt
@@ -0,0 +1,14 @@
+# Check handling of /dev/null in command line options
+# On windows, it should be redirected to a temp file.
+#
+# RUN: "%{python}" %S/check_args.py --my_arg /dev/null | FileCheck %s --check-prefix=CHECK1
+# CHECK1: OK
+
+# RUN: "%{python}" %S/check_args.py --my_arg=/dev/null | FileCheck %s --check-prefix=CHECK2
+# CHECK2: OK
+
+# RUN: "%{python}" %S/check_args.py -a /dev/null | FileCheck %s --check-prefix=CHECK3
+# CHECK3: OK
+
+# RUN: "%{python}" %S/check_args.py -a=/dev/null | FileCheck %s --check-prefix=CHECK4
+# CHECK4: OK
diff --git a/utils/lit/tests/Inputs/shtest-shell/diff-r-error-0.txt b/utils/lit/tests/Inputs/shtest-shell/diff-r-error-0.txt
new file mode 100644
index 000000000000..ae571c14f358
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/diff-r-error-0.txt
@@ -0,0 +1,8 @@
+# Create two directories for further comparison.
+# RUN: rm -rf %t/dir1 %t/dir2
+# RUN: mkdir -p %t/dir1 %t/dir2
+
+# Add two empty files with different names, "diff -r" should fail.
+# RUN: touch %t/dir1/dir1unique
+# RUN: touch %t/dir2/dir2unique
+# RUN: diff -r %t/dir1 %t/dir2
diff --git a/utils/lit/tests/Inputs/shtest-shell/diff-r-error-1.txt b/utils/lit/tests/Inputs/shtest-shell/diff-r-error-1.txt
new file mode 100644
index 000000000000..c68510ddba0f
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/diff-r-error-1.txt
@@ -0,0 +1,9 @@
+# Create two directories for further comparison.
+# RUN: rm -rf %t/dir1 %t/dir2
+# RUN: mkdir -p %t/dir1 %t/dir2
+
+# Same filenames in subdirs with different content, "diff -r" should fail.
+# RUN: mkdir -p %t/dir1/subdir %t/dir2/subdir
+# RUN: echo "12345" > %t/dir1/subdir/f01
+# RUN: echo "00000" > %t/dir2/subdir/f01
+# RUN: diff -r %t/dir1 %t/dir2
diff --git a/utils/lit/tests/Inputs/shtest-shell/diff-r-error-2.txt b/utils/lit/tests/Inputs/shtest-shell/diff-r-error-2.txt
new file mode 100644
index 000000000000..43162614654a
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/diff-r-error-2.txt
@@ -0,0 +1,7 @@
+# Create two directories for further comparison.
+# RUN: rm -rf %t/dir1 %t/dir2
+# RUN: mkdir -p %t/dir1 %t/dir2
+
+# An extra file in one of the directories, "diff -r" should fail.
+# RUN: echo extra > %t/dir2/extrafile
+# RUN: diff -r %t/dir1 %t/dir2
+\ No newline at end of file
diff --git a/utils/lit/tests/Inputs/shtest-shell/diff-r-error-3.txt b/utils/lit/tests/Inputs/shtest-shell/diff-r-error-3.txt
new file mode 100644
index 000000000000..62cede34e823
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/diff-r-error-3.txt
@@ -0,0 +1,7 @@
+# Create two directories for further comparison.
+# RUN: rm -rf %t/dir1 %t/dir2
+# RUN: mkdir -p %t/dir1 %t/dir2
+
+# An extra directory in one of the directories, "diff -r" should fail.
+# RUN: mkdir -p %t/dir1/extra_subdir
+# RUN: diff -r %t/dir1 %t/dir2
diff --git a/utils/lit/tests/Inputs/shtest-shell/diff-r-error-4.txt b/utils/lit/tests/Inputs/shtest-shell/diff-r-error-4.txt
new file mode 100644
index 000000000000..7abec5e64477
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/diff-r-error-4.txt
@@ -0,0 +1,8 @@
+# Create two directories for further comparison.
+# RUN: rm -rf %t/dir1 %t/dir2
+# RUN: mkdir -p %t/dir1 %t/dir2
+
+# Directory vs. File mismatch, "diff -r" should fail.
+# RUN: mkdir -p %t/dir1/extra_subdir
+# RUN: echo ZYX > %t/dir2/extra_subdir
+# RUN: diff -r %t/dir1 %t/dir2
diff --git a/utils/lit/tests/Inputs/shtest-shell/diff-r-error-5.txt b/utils/lit/tests/Inputs/shtest-shell/diff-r-error-5.txt
new file mode 100644
index 000000000000..4f752d70d8a7
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/diff-r-error-5.txt
@@ -0,0 +1,8 @@
+# Create two directories for further comparison.
+# RUN: rm -rf %t/dir1 %t/dir2
+# RUN: mkdir -p %t/dir1 %t/dir2
+
+# Non-empty extra directory, "diff -r" should fail.
+# RUN: mkdir -p %t/dir1/extra_subdir
+# RUN: echo ZYX > %t/dir1/extra_subdir/extra_file
+# RUN: diff -r %t/dir1 %t/dir2
diff --git a/utils/lit/tests/Inputs/shtest-shell/diff-r-error-6.txt b/utils/lit/tests/Inputs/shtest-shell/diff-r-error-6.txt
new file mode 100644
index 000000000000..391e3517fcac
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/diff-r-error-6.txt
@@ -0,0 +1,8 @@
+# Create two directories for further comparison.
+# RUN: rm -rf %t/dir1 %t/dir2
+# RUN: mkdir -p %t/dir1 %t/dir2
+
+# Empty file vs directory mismatch, diff -r should fail.
+# RUN: echo -n > %t/dir1/extra_file
+# RUN: mkdir -p %t/dir2/extra_file
+# RUN: diff -r %t/dir1 %t/dir2
diff --git a/utils/lit/tests/Inputs/shtest-shell/diff-r.txt b/utils/lit/tests/Inputs/shtest-shell/diff-r.txt
new file mode 100644
index 000000000000..c145cab6a49c
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/diff-r.txt
@@ -0,0 +1,20 @@
+# Check recursive diff ("diff -r").
+
+# Create two directories for further comparison.
+# RUN: rm -rf %t/dir1 %t/dir2
+# RUN: mkdir -p %t/dir1 %t/dir2
+
+# Create same files in both of the dirs.
+# RUN: echo "hello" > %t/dir1/f1
+# RUN: echo "hello" > %t/dir2/f1
+
+# Create same subdirectories with same contents.
+# RUN: mkdir -p %t/dir1/subdir %t/dir2/subdir
+# RUN: echo "12345" > %t/dir1/subdir/f01
+# RUN: echo "12345" > %t/dir2/subdir/f01
+# RUN: echo -e "xxx\nzzz\nyyy" > %t/dir1/subdir/f02
+# RUN: echo -e "xxx\nzzz\nyyy" > %t/dir2/subdir/f02
+
+# Create empty subdirectories with same names.
+# RUN: mkdir -p %t/dir1/empty_subdir %t/dir2/empty_subdir
+# RUN: diff -r %t/dir1 %t/dir2
diff --git a/utils/lit/tests/Inputs/shtest-shell/valid-shell.txt b/utils/lit/tests/Inputs/shtest-shell/valid-shell.txt
index d5cbf863e735..6cc83e839cfb 100644
--- a/utils/lit/tests/Inputs/shtest-shell/valid-shell.txt
+++ b/utils/lit/tests/Inputs/shtest-shell/valid-shell.txt
@@ -85,3 +85,87 @@
 # RUN: cd %T/dir1 && echo "hello" > temp1.txt
 # RUN: cd %T/dir2 && echo "hello" > temp2.txt
 # RUN: diff temp2.txt ../dir1/temp1.txt
+#
+# Check cat command with single file.
+#
+# RUN: rm -rf %T/testCat
+# RUN: mkdir -p %T/testCat
+# RUN: echo "abcdefgh" > %T/testCat/temp.write
+# RUN: cat %T/testCat/temp.write > %T/testCat/tempcat.write
+# RUN: "%{python}" %S/check_path.py file %T/testCat/tempcat.write > %T/testCat/path.out
+# RUN: FileCheck --check-prefix=FILE-EXISTS < %T/testCat/path.out %s
+# RUN: FileCheck --check-prefix=CAT-OUTPUT < %T/testCat/tempcat.write %s
+# FILE-EXISTS: True
+# CAT-OUTPUT: abcdefgh
+#
+# Check cat command with multiple files.
+#
+# RUN: rm -rf %T/testCat
+# RUN: mkdir -p %T/testCat
+# RUN: echo "abcdefgh" > %T/testCat/temp1.write
+# RUN: echo "efghijkl" > %T/testCat/temp2.write
+# RUN: echo "mnopqrst" > %T/testCat/temp3.write
+# RUN: cat %T/testCat/temp1.write %T/testCat/temp2.write %T/testCat/temp3.write > %T/testCat/tempmulticat.write
+# RUN: "%{python}" %S/check_path.py file %T/testCat/tempmulticat.write > %T/testCat/path.out
+# RUN: FileCheck --check-prefix=MULTI-FILE-EXISTS < %T/testCat/path.out %s
+# RUN: FileCheck --check-prefix=MULTI-CAT-OUTPUT < %T/testCat/tempmulticat.write %s
+# MULTI-FILE-EXISTS: True
+# MULTI-CAT-OUTPUT: abcdefgh
+# MULTI-CAT-OUTPUT-NEXT: efghijkl
+# MULTI-CAT-OUTPUT-NEXT: mnopqrst
+#
+# Check cat command with multiple files and piped output to FileCheck.
+#
+# RUN: rm -rf %T/testCat
+# RUN: mkdir -p %T/testCat
+# RUN: echo "abcdefgh" > %T/testCat/temp1.write
+# RUN: echo "efghijkl" > %T/testCat/temp2.write
+# RUN: cat %T/testCat/temp1.write %T/testCat/temp2.write | FileCheck --check-prefix=PIPED-CAT-OUTPUT %s
+# PIPED-CAT-OUTPUT: abcdefgh
+# PIPED-CAT-OUTPUT-NEXT: efghijkl
+#
+# Check cat command with multiple files and glob expressions.
+#
+# RUN: rm -rf %T/testCat
+# RUN: mkdir -p %T/testCat
+# RUN: echo "cvbnm" > %T/testCat/temp1.write
+# RUN: echo "qwerty" > %T/testCat/temp2.write
+# RUN: cat %T/testCat/*.write | FileCheck --check-prefix=GLOB-CAT-OUTPUT %s
+# GLOB-CAT-OUTPUT: cvbnm
+# GLOB-CAT-OUTPUT-NEXT: qwerty
+#
+# Check cat command with -v option
+#
+# RUN: cat -v %S/cat_nonprinting.bin | FileCheck --check-prefix=NP-CAT-OUTPUT %s
+# NP-CAT-OUTPUT: ^@^A^B^C^D^E^F^G	^H
+# NP-CAT-OUTPUT-NEXT: ^K^L^M^N^O^P^Q^R^S
+# NP-CAT-OUTPUT-NEXT: ^T^U^V^W^X^Y^Z^[^\^]^^^_ !"#$%&'
+# NP-CAT-OUTPUT-NEXT: ()*+,-./0123456789:;
+# NP-CAT-OUTPUT-NEXT: <=>?@ABCDEFGHIJKLMNO
+# NP-CAT-OUTPUT-NEXT: PQRSTUVWXYZ[\]^_`abc
+# NP-CAT-OUTPUT-NEXT: defghijklmnopqrstuvw
+# NP-CAT-OUTPUT-NEXT: xyz{|}~^?M-^@M-^AM-^BM-^CM-^DM-^EM-^FM-^GM-^HM-^IM-^JM-^K
+# NP-CAT-OUTPUT-NEXT: M-^LM-^MM-^NM-^OM-^PM-^QM-^RM-^SM-^TM-^UM-^VM-^WM-^XM-^YM-^ZM-^[M-^\M-^]M-^^M-^_
+# NP-CAT-OUTPUT-NEXT: M- M-!M-"M-#M-$M-%M-&M-'M-(M-)M-*M-+M-,M--M-.M-/M-0M-1M-2M-3
+# NP-CAT-OUTPUT-NEXT: M-4M-5M-6M-7M-8M-9M-:M-;M-<M-=M->M-?M-@M-AM-BM-CM-DM-EM-FM-G
+# NP-CAT-OUTPUT-NEXT: M-HM-IM-JM-KM-LM-MM-NM-OM-PM-QM-RM-SM-TM-UM-VM-WM-XM-YM-ZM-[
+# NP-CAT-OUTPUT-NEXT: M-\M-]M-^M-_M-`M-aM-bM-cM-dM-eM-fM-gM-hM-iM-jM-kM-lM-mM-nM-o
+# NP-CAT-OUTPUT-NEXT: M-pM-qM-rM-sM-tM-uM-vM-wM-xM-yM-zM-{M-|M-}M-~M-^?
+#
+# Check cat command with -show-nonprinting option
+#
+# RUN: cat --show-nonprinting %S/cat_nonprinting.bin | FileCheck --check-prefix=NPLONG-CAT-OUTPUT %s
+# NPLONG-CAT-OUTPUT: ^@^A^B^C^D^E^F^G	^H
+# NPLONG-CAT-OUTPUT-NEXT: ^K^L^M^N^O^P^Q^R^S
+# NPLONG-CAT-OUTPUT-NEXT: ^T^U^V^W^X^Y^Z^[^\^]^^^_ !"#$%&'
+# NPLONG-CAT-OUTPUT-NEXT: ()*+,-./0123456789:;
+# NPLONG-CAT-OUTPUT-NEXT: <=>?@ABCDEFGHIJKLMNO
+# NPLONG-CAT-OUTPUT-NEXT: PQRSTUVWXYZ[\]^_`abc
+# NPLONG-CAT-OUTPUT-NEXT: defghijklmnopqrstuvw
+# NPLONG-CAT-OUTPUT-NEXT: xyz{|}~^?M-^@M-^AM-^BM-^CM-^DM-^EM-^FM-^GM-^HM-^IM-^JM-^K
+# NPLONG-CAT-OUTPUT-NEXT: M-^LM-^MM-^NM-^OM-^PM-^QM-^RM-^SM-^TM-^UM-^VM-^WM-^XM-^YM-^ZM-^[M-^\M-^]M-^^M-^_
+# NPLONG-CAT-OUTPUT-NEXT: M- M-!M-"M-#M-$M-%M-&M-'M-(M-)M-*M-+M-,M--M-.M-/M-0M-1M-2M-3
+# NPLONG-CAT-OUTPUT-NEXT: M-4M-5M-6M-7M-8M-9M-:M-;M-<M-=M->M-?M-@M-AM-BM-CM-DM-EM-FM-G
+# NPLONG-CAT-OUTPUT-NEXT: M-HM-IM-JM-KM-LM-MM-NM-OM-PM-QM-RM-SM-TM-UM-VM-WM-XM-YM-ZM-[
+# NPLONG-CAT-OUTPUT-NEXT: M-\M-]M-^M-_M-`M-aM-bM-cM-dM-eM-fM-gM-hM-iM-jM-kM-lM-mM-nM-o
+# NPLONG-CAT-OUTPUT-NEXT: M-pM-qM-rM-sM-tM-uM-vM-wM-xM-yM-zM-{M-|M-}M-~M-^?
diff --git a/utils/lit/tests/Inputs/shtest-timeout/lit.cfg b/utils/lit/tests/Inputs/shtest-timeout/lit.cfg
index 81b4a12120d8..c3a1c3b96ada 100644
--- a/utils/lit/tests/Inputs/shtest-timeout/lit.cfg
+++ b/utils/lit/tests/Inputs/shtest-timeout/lit.cfg
@@ -17,9 +17,9 @@ else:
 
 configSetTimeout = lit_config.params.get('set_timeout', '0')
 
-if configSetTimeout == '1':
+if configSetTimeout != '0':
     # Try setting the max individual test time in the configuration
-    lit_config.maxIndividualTestTime = 1
+    lit_config.maxIndividualTestTime = int(configSetTimeout)
 
 config.test_format = lit.formats.ShTest(execute_external=externalShell)
 config.suffixes = ['.py']
diff --git a/utils/lit/tests/Inputs/shtest-timeout/quick_then_slow.py b/utils/lit/tests/Inputs/shtest-timeout/quick_then_slow.py
deleted file mode 100644
index b81fbe5a8bfe..000000000000
--- a/utils/lit/tests/Inputs/shtest-timeout/quick_then_slow.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# RUN: %{python} %s quick
-# RUN: %{python} %s slow
-from __future__ import print_function
-
-import time
-import sys
-
-if len(sys.argv) != 2:
-    print("Wrong number of args")
-    sys.exit(1)
-
-mode =  sys.argv[1]
-
-if mode == 'slow':
-    print("Running in slow mode")
-    sys.stdout.flush() # Make sure the print gets flushed so it appears in lit output.
-    time.sleep(6)
-    sys.exit(0)
-elif mode == 'quick':
-    print("Running in quick mode")
-    sys.exit(0)
-else:
-    print("Unrecognised mode {}".format(mode))
-    sys.exit(1)
diff --git a/utils/lit/tests/Inputs/shtest-timeout/slow.py b/utils/lit/tests/Inputs/shtest-timeout/slow.py
deleted file mode 100644
index 2dccd6331360..000000000000
--- a/utils/lit/tests/Inputs/shtest-timeout/slow.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: %{python} %s
-from __future__ import print_function
-
-import time
-import sys
-
-print("Running slow program")
-sys.stdout.flush() # Make sure the print gets flushed so it appears in lit output.
-time.sleep(6)
diff --git a/utils/lit/tests/Inputs/test-data-micro/dummy_format.py b/utils/lit/tests/Inputs/test-data-micro/dummy_format.py
new file mode 100644
index 000000000000..5842f5a5ba33
--- /dev/null
+++ b/utils/lit/tests/Inputs/test-data-micro/dummy_format.py
@@ -0,0 +1,52 @@
+import os
+try:
+    import ConfigParser
+except ImportError:
+    import configparser as ConfigParser
+
+import lit.formats
+import lit.Test
+
+class DummyFormat(lit.formats.FileBasedTest):
+    def execute(self, test, lit_config):
+        # In this dummy format, expect that each test file is actually just a
+        # .ini format dump of the results to report.
+
+        source_path = test.getSourcePath()
+
+        cfg = ConfigParser.ConfigParser()
+        cfg.read(source_path)
+
+        # Create the basic test result.
+        result_code = cfg.get('global', 'result_code')
+        result_output = cfg.get('global', 'result_output')
+        result = lit.Test.Result(getattr(lit.Test, result_code),
+                                 result_output)
+
+        # Load additional metrics.
+        for key,value_str in cfg.items('results'):
+            value = eval(value_str)
+            if isinstance(value, int):
+                metric = lit.Test.IntMetricValue(value)
+            elif isinstance(value, float):
+                metric = lit.Test.RealMetricValue(value)
+            else:
+                raise RuntimeError("unsupported result type")
+            result.addMetric(key, metric)
+
+        # Create micro test results
+        for key,micro_name in cfg.items('micro-tests'):
+            micro_result = lit.Test.Result(getattr(lit.Test, result_code, ''))
+            # Load micro test additional metrics
+            for key,value_str in cfg.items('micro-results'):
+                value = eval(value_str)
+                if isinstance(value, int):
+                    metric = lit.Test.IntMetricValue(value)
+                elif isinstance(value, float):
+                    metric = lit.Test.RealMetricValue(value)
+                else:
+                    raise RuntimeError("unsupported result type")
+                micro_result.addMetric(key, metric)
+            result.addMicroResult(micro_name, micro_result)
+
+        return result
diff --git a/utils/lit/tests/Inputs/test-data-micro/lit.cfg b/utils/lit/tests/Inputs/test-data-micro/lit.cfg
new file mode 100644
index 000000000000..3fc1e8597360
--- /dev/null
+++ b/utils/lit/tests/Inputs/test-data-micro/lit.cfg
@@ -0,0 +1,10 @@
+import site
+site.addsitedir(os.path.dirname(__file__))
+import dummy_format
+
+config.name = 'test-data-micro'
+config.suffixes = ['.ini']
+config.test_format = dummy_format.DummyFormat()
+config.test_source_root = None
+config.test_exec_root = None
+config.target_triple = None
diff --git a/utils/lit/tests/Inputs/test-data-micro/micro-tests.ini b/utils/lit/tests/Inputs/test-data-micro/micro-tests.ini
new file mode 100644
index 000000000000..1e5d76ac7aed
--- /dev/null
+++ b/utils/lit/tests/Inputs/test-data-micro/micro-tests.ini
@@ -0,0 +1,16 @@
+[global]
+result_code = PASS
+result_output = Test passed.
+
+[results]
+value0 = 1
+value1 = 2.3456
+
+[micro-tests]
+microtest0 = test0
+microtest1 = test1
+microtest2 = test2
+
+[micro-results]
+micro_value0 = 4
+micro_value1 = 1.3
diff --git a/utils/lit/tests/Inputs/xunit-output/bad&name.ini b/utils/lit/tests/Inputs/xunit-output/bad&name.ini
new file mode 100644
index 000000000000..3234da66aa4f
--- /dev/null
+++ b/utils/lit/tests/Inputs/xunit-output/bad&name.ini
@@ -0,0 +1,7 @@
+[global]
+result_code = FAIL
+result_output = & < > ]]> &"
+
+[results]
+value0 = 1
+value1 = 2.3456
diff --git a/utils/lit/tests/Inputs/xunit-output/dummy_format.py b/utils/lit/tests/Inputs/xunit-output/dummy_format.py
new file mode 100644
index 000000000000..93e48eeb8396
--- /dev/null
+++ b/utils/lit/tests/Inputs/xunit-output/dummy_format.py
@@ -0,0 +1,38 @@
+import os
+try:
+    import ConfigParser
+except ImportError:
+    import configparser as ConfigParser
+
+import lit.formats
+import lit.Test
+
+class DummyFormat(lit.formats.FileBasedTest):
+    def execute(self, test, lit_config):
+        # In this dummy format, expect that each test file is actually just a
+        # .ini format dump of the results to report.
+
+        source_path = test.getSourcePath()
+
+        cfg = ConfigParser.ConfigParser()
+        cfg.read(source_path)
+
+        # Create the basic test result.
+        result_code = cfg.get('global', 'result_code')
+        result_output = cfg.get('global', 'result_output')
+        result = lit.Test.Result(getattr(lit.Test, result_code),
+                                 result_output)
+
+        # Load additional metrics.
+        for key,value_str in cfg.items('results'):
+            value = eval(value_str)
+            if isinstance(value, int):
+                metric = lit.Test.IntMetricValue(value)
+            elif isinstance(value, float):
+                metric = lit.Test.RealMetricValue(value)
+            else:
+                raise RuntimeError("unsupported result type")
+            result.addMetric(key, metric)
+
+        return result
+
diff --git a/utils/lit/tests/Inputs/xunit-output/lit.cfg b/utils/lit/tests/Inputs/xunit-output/lit.cfg
new file mode 100644
index 000000000000..0191cc218884
--- /dev/null
+++ b/utils/lit/tests/Inputs/xunit-output/lit.cfg
@@ -0,0 +1,10 @@
+import site
+site.addsitedir(os.path.dirname(__file__))
+import dummy_format
+
+config.name = 'test-data'
+config.suffixes = ['.ini']
+config.test_format = dummy_format.DummyFormat()
+config.test_source_root = None
+config.test_exec_root = None
+config.target_triple = None
diff --git a/utils/lit/tests/max-failures.py b/utils/lit/tests/max-failures.py
index c86d7b7fbdce..ca107bc29b9f 100644
--- a/utils/lit/tests/max-failures.py
+++ b/utils/lit/tests/max-failures.py
@@ -8,7 +8,7 @@
 #
 # END.
 
-# CHECK: Failing Tests (17)
+# CHECK: Failing Tests (27)
 # CHECK: Failing Tests (1)
 # CHECK: Failing Tests (2)
 # CHECK: error: Setting --max-failures to 0 does not have any effect.
diff --git a/utils/lit/tests/shtest-format.py b/utils/lit/tests/shtest-format.py
index 94d74e3a9200..8c318697195a 100644
--- a/utils/lit/tests/shtest-format.py
+++ b/utils/lit/tests/shtest-format.py
@@ -1,8 +1,10 @@
 # Check the various features of the ShTest format.
 #
-# RUN: not %{lit} -j 1 -v %{inputs}/shtest-format > %t.out
+# RUN: rm -f %t.xml
+# RUN: not %{lit} -j 1 -v %{inputs}/shtest-format --xunit-xml-output %t.xml > %t.out
 # RUN: FileCheck < %t.out %s
-#
+# RUN: FileCheck --check-prefix=XUNIT < %t.xml %s
+
 # END.
 
 # CHECK: -- Testing:
@@ -39,6 +41,7 @@
 #
 # CHECK: Command Output (stdout):
 # CHECK-NEXT: --
+# CHECK-NEXT: $ ":" "RUN: at line 1"
 # CHECK-NEXT: $ "printf"
 # CHECK-NEXT: # command output:
 # CHECK-NEXT: line 1: failed test output on stdout
@@ -83,3 +86,85 @@
 # CHECK: Unresolved Tests   : 3
 # CHECK: Unexpected Passes  : 1
 # CHECK: Unexpected Failures: 3
+
+
+# XUNIT: <?xml version="1.0" encoding="UTF-8" ?>
+# XUNIT-NEXT: <testsuites>
+# XUNIT-NEXT: <testsuite name="shtest-format" tests="23" failures="7" skipped="5">
+
+# XUNIT: <testcase classname="shtest-format.shtest-format" name="argv0.txt" time="{{[0-9]+\.[0-9]+}}"/>
+
+# XUNIT: <testcase classname="shtest-format.external_shell" name="fail.txt" time="{{[0-9]+\.[0-9]+}}">
+# XUNIT-NEXT: <failure{{[ ]*}}>
+# XUNIT: </failure>
+# XUNIT-NEXT: </testcase>
+
+
+# XUNIT: <testcase classname="shtest-format.external_shell" name="fail_with_bad_encoding.txt" time="{{[0-9]+\.[0-9]+}}">
+# XUNIT-NEXT: <failure{{[ ]*}}>
+# XUNIT: </failure>
+# XUNIT-NEXT: </testcase>
+
+# XUNIT: <testcase classname="shtest-format.external_shell" name="pass.txt" time="{{[0-9]+\.[0-9]+}}"/>
+
+# XUNIT: <testcase classname="shtest-format.shtest-format" name="fail.txt" time="{{[0-9]+\.[0-9]+}}">
+# XUNIT-NEXT: <failure{{[ ]*}}>
+# XUNIT: </failure>
+# XUNIT-NEXT: </testcase>
+
+# XUNIT: <testcase classname="shtest-format.shtest-format" name="no-test-line.txt" time="{{[0-9]+\.[0-9]+}}">
+# XUNIT-NEXT: <failure{{[ ]*}}>
+# XUNIT: </failure>
+# XUNIT-NEXT: </testcase>
+
+# XUNIT: <testcase classname="shtest-format.shtest-format" name="pass.txt" time="{{[0-9]+\.[0-9]+}}"/>
+
+# XUNIT: <testcase classname="shtest-format.shtest-format" name="requires-any-missing.txt" time="{{[0-9]+\.[0-9]+}}">
+# XUNIT-NEXT:<skipped message="Skipping because of: a-missing-feature || a-missing-feature-2" />
+
+# XUNIT: <testcase classname="shtest-format.shtest-format" name="requires-any-present.txt" time="{{[0-9]+\.[0-9]+}}"/>
+
+# XUNIT: <testcase classname="shtest-format.shtest-format" name="requires-missing.txt" time="{{[0-9]+\.[0-9]+}}">
+# XUNIT-NEXT:<skipped message="Skipping because of: a-missing-feature" />
+
+# XUNIT: <testcase classname="shtest-format.shtest-format" name="requires-present.txt" time="{{[0-9]+\.[0-9]+}}"/>
+
+# XUNIT: <testcase classname="shtest-format.shtest-format" name="requires-star.txt" time="{{[0-9]+\.[0-9]+}}">
+# XUNIT-NEXT: <failure{{[ ]*}}>
+# XUNIT: </failure>
+# XUNIT-NEXT: </testcase>
+
+
+# XUNIT: <testcase classname="shtest-format.shtest-format" name="requires-triple.txt" time="{{[0-9]+\.[0-9]+}}">
+# XUNIT-NEXT:<skipped message="Skipping because of: x86_64" />
+
+# XUNIT: <testcase classname="shtest-format.shtest-format" name="unsupported-expr-false.txt" time="{{[0-9]+\.[0-9]+}}"/>
+
+# XUNIT: <testcase classname="shtest-format.shtest-format" name="unsupported-expr-true.txt" time="{{[0-9]+\.[0-9]+}}">
+# XUNIT-NEXT:<skipped message="Skipping because of configuration." />
+
+# XUNIT: <testcase classname="shtest-format.shtest-format" name="unsupported-star.txt" time="{{[0-9]+\.[0-9]+}}">
+# XUNIT-NEXT: <failure{{[ ]*}}>
+# XUNIT: </failure>
+# XUNIT-NEXT: </testcase>
+
+# XUNIT: <testcase classname="shtest-format.unsupported_dir" name="some-test.txt" time="{{[0-9]+\.[0-9]+}}">
+# XUNIT-NEXT:<skipped message="Skipping because of configuration." />
+
+# XUNIT: <testcase classname="shtest-format.shtest-format" name="xfail-expr-false.txt" time="{{[0-9]+\.[0-9]+}}"/>
+
+# XUNIT: <testcase classname="shtest-format.shtest-format" name="xfail-expr-true.txt" time="{{[0-9]+\.[0-9]+}}"/>
+
+# XUNIT: <testcase classname="shtest-format.shtest-format" name="xfail-feature.txt" time="{{[0-9]+\.[0-9]+}}"/>
+
+# XUNIT: <testcase classname="shtest-format.shtest-format" name="xfail-target.txt" time="{{[0-9]+\.[0-9]+}}"/>
+
+# XUNIT: <testcase classname="shtest-format.shtest-format" name="xfail.txt" time="{{[0-9]+\.[0-9]+}}"/>
+
+# XUNIT: <testcase classname="shtest-format.shtest-format" name="xpass.txt" time="{{[0-9]+\.[0-9]+}}">
+# XUNIT-NEXT: <failure{{[ ]*}}>
+# XUNIT: </failure>
+# XUNIT-NEXT: </testcase>
+
+# XUNIT: </testsuite>
+# XUNIT-NEXT: </testsuites>
diff --git a/utils/lit/tests/shtest-output-printing.py b/utils/lit/tests/shtest-output-printing.py
index 2a85cf975c9a..2344ef200367 100644
--- a/utils/lit/tests/shtest-output-printing.py
+++ b/utils/lit/tests/shtest-output-printing.py
@@ -16,12 +16,15 @@
 #
 # CHECK:      Command Output
 # CHECK-NEXT: --
+# CHECK-NEXT: $ ":" "RUN: at line 1"
 # CHECK-NEXT: $ "true"
+# CHECK-NEXT: $ ":" "RUN: at line 2"
 # CHECK-NEXT: $ "echo" "hi"
 # CHECK-NEXT: # command output:
 # CHECK-NEXT: hi
 #
-# CHECK:      $ "wc" "missing-file"
+# CHECK:      $ ":" "RUN: at line 3"
+# CHECK-NEXT: $ "wc" "missing-file"
 # CHECK-NEXT: # redirected output from '{{.*(/|\\\\)}}basic.txt.tmp.out':
 # CHECK-NEXT: missing-file{{.*}} No such file or directory
 # CHECK:      note: command had no output on stdout or stderr
diff --git a/utils/lit/tests/shtest-run-at-line.py b/utils/lit/tests/shtest-run-at-line.py
new file mode 100644
index 000000000000..cd0e08137ee5
--- /dev/null
+++ b/utils/lit/tests/shtest-run-at-line.py
@@ -0,0 +1,70 @@
+# Check that -vv makes the line number of the failing RUN command clear.
+# (-v is actually sufficient in the case of the internal shell.)
+#
+# RUN: not %{lit} -j 1 -vv %{inputs}/shtest-run-at-line > %t.out
+# RUN: FileCheck --input-file %t.out %s
+#
+# END.
+
+
+# CHECK: Testing: 4 tests
+
+
+# In the case of the external shell, we check for only RUN lines in stderr in
+# case some shell implementations format "set -x" output differently.
+
+# CHECK-LABEL: FAIL: shtest-run-at-line :: external-shell/basic.txt
+
+# CHECK:      Script:
+# CHECK:      RUN: at line 4{{.*}}  true
+# CHECK-NEXT: RUN: at line 5{{.*}}  false
+# CHECK-NEXT: RUN: at line 6{{.*}}  true
+
+# CHECK:     RUN: at line 4
+# CHECK:     RUN: at line 5
+# CHECK-NOT: RUN
+
+# CHECK-LABEL: FAIL: shtest-run-at-line :: external-shell/line-continuation.txt
+
+# CHECK:      Script:
+# CHECK:      RUN: at line 4{{.*}}  echo 'foo bar'  | FileCheck
+# CHECK-NEXT: RUN: at line 6{{.*}}  echo 'foo baz'  | FileCheck
+# CHECK-NEXT: RUN: at line 9{{.*}}  echo 'foo bar'  | FileCheck
+
+# CHECK:     RUN: at line 4
+# CHECK:     RUN: at line 6
+# CHECK-NOT: RUN
+
+
+# CHECK-LABEL: FAIL: shtest-run-at-line :: internal-shell/basic.txt
+
+# CHECK:      Script:
+# CHECK:      : 'RUN: at line 1';  true
+# CHECK-NEXT: : 'RUN: at line 2';  false
+# CHECK-NEXT: : 'RUN: at line 3';  true
+
+# CHECK:      Command Output (stdout)
+# CHECK:      $ ":" "RUN: at line 1"
+# CHECK-NEXT: $ "true"
+# CHECK-NEXT: $ ":" "RUN: at line 2"
+# CHECK-NEXT: $ "false"
+# CHECK-NOT:  RUN
+
+# CHECK-LABEL: FAIL: shtest-run-at-line :: internal-shell/line-continuation.txt
+
+# CHECK:      Script:
+# CHECK:      : 'RUN: at line 1';  : first line continued to second line
+# CHECK-NEXT: : 'RUN: at line 3';  echo 'foo bar'  | FileCheck
+# CHECK-NEXT: : 'RUN: at line 5';  echo  'foo baz'  | FileCheck
+# CHECK-NEXT: : 'RUN: at line 8';  echo 'foo bar'  | FileCheck
+
+# CHECK:      Command Output (stdout)
+# CHECK:      $ ":" "RUN: at line 1"
+# CHECK-NEXT: $ ":" "first" "line" "continued" "to" "second" "line"
+# CHECK-NEXT: $ ":" "RUN: at line 3"
+# CHECK-NEXT: $ "echo" "foo bar"
+# CHECK-NEXT: $ "FileCheck" "{{.*}}"
+# CHECK-NEXT: $ ":" "RUN: at line 5"
+# CHECK-NEXT: $ "echo" "foo baz"
+# CHECK-NEXT: $ "FileCheck" "{{.*}}"
+# CHECK-NOT:  RUN
diff --git a/utils/lit/tests/shtest-shell.py b/utils/lit/tests/shtest-shell.py
index 723842fce040..ed0bdf35fe11 100644
--- a/utils/lit/tests/shtest-shell.py
+++ b/utils/lit/tests/shtest-shell.py
@@ -10,6 +10,29 @@
 
 # CHECK: -- Testing:
 
+# CHECK: FAIL: shtest-shell :: cat-error-0.txt
+# CHECK: *** TEST 'shtest-shell :: cat-error-0.txt' FAILED ***
+# CHECK: $ "cat" "-b" "temp1.txt"
+# CHECK: # command stderr:
+# CHECK: Unsupported: 'cat':  option -b not recognized
+# CHECK: error: command failed with exit status: 1
+# CHECK: ***
+
+# CHECK: FAIL: shtest-shell :: cat-error-1.txt
+# CHECK: *** TEST 'shtest-shell :: cat-error-1.txt' FAILED ***
+# CHECK: $ "cat" "temp1.txt"
+# CHECK: # command stderr:
+# CHECK: [Errno 2] No such file or directory: 'temp1.txt'
+# CHECK: error: command failed with exit status: 1
+# CHECK: ***
+
+# CHECK: FAIL: shtest-shell :: colon-error.txt
+# CHECK: *** TEST 'shtest-shell :: colon-error.txt' FAILED ***
+# CHECK: $ ":"
+# CHECK: # command stderr:
+# CHECK: Unsupported: ':' cannot be part of a pipeline
+# CHECK: error: command failed with exit status: 127
+# CHECK: ***
 
 # CHECK: FAIL: shtest-shell :: diff-error-0.txt
 # CHECK: *** TEST 'shtest-shell :: diff-error-0.txt' FAILED ***
@@ -71,6 +94,61 @@
 # CHECK: error: command failed with exit status: 127
 # CHECK: ***
 
+# CHECK: FAIL: shtest-shell :: diff-r-error-0.txt
+# CHECK: *** TEST 'shtest-shell :: diff-r-error-0.txt' FAILED ***
+# CEHCK: $ "diff" "-r" 
+# CHECK: # command output:
+# CHECK: Only in {{.*}}dir1: dir1unique
+# CHECK: Only in {{.*}}dir2: dir2unique
+# CHECK: error: command failed with exit status: 1
+
+# CHECK: FAIL: shtest-shell :: diff-r-error-1.txt
+# CHECK: *** TEST 'shtest-shell :: diff-r-error-1.txt' FAILED ***
+# CEHCK: $ "diff" "-r" 
+# CHECK: # command output:
+# CHECK: *** {{.*}}dir1{{.*}}subdir{{.*}}f01
+# CHECK: --- {{.*}}dir2{{.*}}subdir{{.*}}f01
+# CHECK: 12345
+# CHECK: 00000
+# CHECK: error: command failed with exit status: 1
+
+# CHECK: FAIL: shtest-shell :: diff-r-error-2.txt
+# CHECK: *** TEST 'shtest-shell :: diff-r-error-2.txt' FAILED ***
+# CEHCK: $ "diff" "-r" 
+# CHECK: # command output:
+# CHECK: Only in {{.*}}dir2: extrafile
+# CHECK: error: command failed with exit status: 1
+
+# CHECK: FAIL: shtest-shell :: diff-r-error-3.txt
+# CHECK: *** TEST 'shtest-shell :: diff-r-error-3.txt' FAILED ***
+# CEHCK: $ "diff" "-r" 
+# CHECK: # command output:
+# CHECK: Only in {{.*}}dir1: extra_subdir
+# CHECK: error: command failed with exit status: 1
+
+# CHECK: FAIL: shtest-shell :: diff-r-error-4.txt
+# CHECK: *** TEST 'shtest-shell :: diff-r-error-4.txt' FAILED ***
+# CEHCK: $ "diff" "-r" 
+# CHECK: # command output:
+# CHECK: File {{.*}}dir1{{.*}}extra_subdir is a directory while file {{.*}}dir2{{.*}}extra_subdir is a regular file
+# CHECK: error: command failed with exit status: 1
+
+# CHECK: FAIL: shtest-shell :: diff-r-error-5.txt
+# CHECK: *** TEST 'shtest-shell :: diff-r-error-5.txt' FAILED ***
+# CEHCK: $ "diff" "-r" 
+# CHECK: # command output:
+# CHECK: Only in {{.*}}dir1: extra_subdir
+# CHECK: error: command failed with exit status: 1
+
+# CHECK: FAIL: shtest-shell :: diff-r-error-6.txt
+# CHECK: *** TEST 'shtest-shell :: diff-r-error-6.txt' FAILED ***
+# CEHCK: $ "diff" "-r" 
+# CHECK: # command output:
+# CHECK: File {{.*}}dir1{{.*}}extra_file is a regular empty file while file {{.*}}dir2{{.*}}extra_file is a directory
+# CHECK: error: command failed with exit status: 1
+
+# CHECK: PASS: shtest-shell :: diff-r.txt
+
 # CHECK: FAIL: shtest-shell :: error-0.txt
 # CHECK: *** TEST 'shtest-shell :: error-0.txt' FAILED ***
 # CHECK: $ "not-a-real-command"
@@ -83,7 +161,7 @@
 #
 # CHECK: FAIL: shtest-shell :: error-1.txt
 # CHECK: *** TEST 'shtest-shell :: error-1.txt' FAILED ***
-# CHECK: shell parser error on: 'echo "missing quote'
+# CHECK: shell parser error on: ': \'RUN: at line 3\'; echo "missing quote'
 # CHECK: ***
 
 # CHECK: FAIL: shtest-shell :: error-2.txt
@@ -149,4 +227,4 @@
 # CHECK: PASS: shtest-shell :: sequencing-0.txt
 # CHECK: XFAIL: shtest-shell :: sequencing-1.txt
 # CHECK: PASS: shtest-shell :: valid-shell.txt
-# CHECK: Failing Tests (17)
+# CHECK: Failing Tests (27)
diff --git a/utils/lit/tests/shtest-timeout.py b/utils/lit/tests/shtest-timeout.py
index 879850065908..2f1fc3d17c01 100644
--- a/utils/lit/tests/shtest-timeout.py
+++ b/utils/lit/tests/shtest-timeout.py
@@ -3,13 +3,16 @@
 # PR33944
 # XFAIL: windows
 
+# FIXME: This test is fragile because it relies on time which can
+# be affected by system performance. In particular we are currently
+# assuming that `short.py` can be successfully executed within 2
+# seconds of wallclock time.
+
 # Test per test timeout using external shell
 # RUN: not %{lit} \
 # RUN: %{inputs}/shtest-timeout/infinite_loop.py \
-# RUN: %{inputs}/shtest-timeout/quick_then_slow.py \
 # RUN: %{inputs}/shtest-timeout/short.py \
-# RUN: %{inputs}/shtest-timeout/slow.py \
-# RUN: -j 1 -v --debug --timeout 1 --param external=1 > %t.extsh.out 2> %t.extsh.err
+# RUN: -j 1 -v --debug --timeout 2 --param external=1 > %t.extsh.out 2> %t.extsh.err
 # RUN: FileCheck --check-prefix=CHECK-OUT-COMMON < %t.extsh.out %s
 # RUN: FileCheck --check-prefix=CHECK-EXTSH-ERR < %t.extsh.err %s
 #
@@ -18,32 +21,14 @@
 # Test per test timeout using internal shell
 # RUN: not %{lit} \
 # RUN: %{inputs}/shtest-timeout/infinite_loop.py \
-# RUN: %{inputs}/shtest-timeout/quick_then_slow.py \
 # RUN: %{inputs}/shtest-timeout/short.py \
-# RUN: %{inputs}/shtest-timeout/slow.py \
-# RUN: -j 1 -v --debug --timeout 1 --param external=0 > %t.intsh.out 2> %t.intsh.err
+# RUN: -j 1 -v --debug --timeout 2 --param external=0 > %t.intsh.out 2> %t.intsh.err
 # RUN: FileCheck  --check-prefix=CHECK-OUT-COMMON < %t.intsh.out %s
 # RUN: FileCheck --check-prefix=CHECK-INTSH-OUT < %t.intsh.out %s
 # RUN: FileCheck --check-prefix=CHECK-INTSH-ERR < %t.intsh.err %s
 
 # CHECK-INTSH-OUT: TIMEOUT: per_test_timeout :: infinite_loop.py
 # CHECK-INTSH-OUT: command output:
-# CHECK-INTSH-OUT-NEXT: Running infinite loop
-# CHECK-INTSH-OUT: command reached timeout: True
-
-# CHECK-INTSH-OUT: TIMEOUT: per_test_timeout :: quick_then_slow.py
-# CHECK-INTSH-OUT: Timeout: Reached timeout of 1 seconds
-# CHECK-INTSH-OUT: Command Output
-# CHECK-INTSH-OUT: command output:
-# CHECK-INTSH-OUT-NEXT: Running in quick mode
-# CHECK-INTSH-OUT: command reached timeout: False
-# CHECK-INTSH-OUT: command output:
-# CHECK-INTSH-OUT-NEXT: Running in slow mode
-# CHECK-INTSH-OUT: command reached timeout: True
-
-# CHECK-INTSH-OUT: TIMEOUT: per_test_timeout :: slow.py
-# CHECK-INTSH-OUT: command output:
-# CHECK-INTSH-OUT-NEXT: Running slow program
 # CHECK-INTSH-OUT: command reached timeout: True
 
 # CHECK-INTSH-ERR: Using internal shell
@@ -51,44 +36,28 @@
 # Test per test timeout set via a config file rather than on the command line
 # RUN: not %{lit} \
 # RUN: %{inputs}/shtest-timeout/infinite_loop.py \
-# RUN: %{inputs}/shtest-timeout/quick_then_slow.py \
 # RUN: %{inputs}/shtest-timeout/short.py \
-# RUN: %{inputs}/shtest-timeout/slow.py \
 # RUN: -j 1 -v --debug --param external=0 \
-# RUN: --param set_timeout=1 > %t.cfgset.out 2> %t.cfgset.err
+# RUN: --param set_timeout=2 > %t.cfgset.out 2> %t.cfgset.err
 # RUN: FileCheck --check-prefix=CHECK-OUT-COMMON  < %t.cfgset.out %s
 # RUN: FileCheck --check-prefix=CHECK-CFGSET-ERR < %t.cfgset.err %s
 #
 # CHECK-CFGSET-ERR: Using internal shell
 
 # CHECK-OUT-COMMON: TIMEOUT: per_test_timeout :: infinite_loop.py
-# CHECK-OUT-COMMON: Timeout: Reached timeout of 1 seconds
-# CHECK-OUT-COMMON: Command {{([0-9]+ )?}}Output
-# CHECK-OUT-COMMON: Running infinite loop
-
-# CHECK-OUT-COMMON: TIMEOUT: per_test_timeout :: quick_then_slow.py
-# CHECK-OUT-COMMON: Timeout: Reached timeout of 1 seconds
+# CHECK-OUT-COMMON: Timeout: Reached timeout of 2 seconds
 # CHECK-OUT-COMMON: Command {{([0-9]+ )?}}Output
-# CHECK-OUT-COMMON: Running in quick mode
-# CHECK-OUT-COMMON: Running in slow mode
 
 # CHECK-OUT-COMMON: PASS: per_test_timeout :: short.py
 
-# CHECK-OUT-COMMON: TIMEOUT: per_test_timeout :: slow.py
-# CHECK-OUT-COMMON: Timeout: Reached timeout of 1 seconds
-# CHECK-OUT-COMMON: Command {{([0-9]+ )?}}Output
-# CHECK-OUT-COMMON: Running slow program
-
 # CHECK-OUT-COMMON: Expected Passes{{ *}}: 1
-# CHECK-OUT-COMMON: Individual Timeouts{{ *}}: 3
+# CHECK-OUT-COMMON: Individual Timeouts{{ *}}: 1
 
 # Test per test timeout via a config file and on the command line.
 # The value set on the command line should override the config file.
 # RUN: not %{lit} \
 # RUN: %{inputs}/shtest-timeout/infinite_loop.py \
-# RUN: %{inputs}/shtest-timeout/quick_then_slow.py \
 # RUN: %{inputs}/shtest-timeout/short.py \
-# RUN: %{inputs}/shtest-timeout/slow.py \
 # RUN: -j 1 -v --debug --param external=0 \
 # RUN: --param set_timeout=1 --timeout=2 > %t.cmdover.out 2> %t.cmdover.err
 # RUN: FileCheck --check-prefix=CHECK-CMDLINE-OVERRIDE-OUT  < %t.cmdover.out %s
@@ -99,20 +68,8 @@
 # CHECK-CMDLINE-OVERRIDE-OUT: TIMEOUT: per_test_timeout :: infinite_loop.py
 # CHECK-CMDLINE-OVERRIDE-OUT: Timeout: Reached timeout of 2 seconds
 # CHECK-CMDLINE-OVERRIDE-OUT: Command {{([0-9]+ )?}}Output
-# CHECK-CMDLINE-OVERRIDE-OUT: Running infinite loop
-
-# CHECK-CMDLINE-OVERRIDE-OUT: TIMEOUT: per_test_timeout :: quick_then_slow.py
-# CHECK-CMDLINE-OVERRIDE-OUT: Timeout: Reached timeout of 2 seconds
-# CHECK-CMDLINE-OVERRIDE-OUT: Command {{([0-9]+ )?}}Output
-# CHECK-CMDLINE-OVERRIDE-OUT: Running in quick mode
-# CHECK-CMDLINE-OVERRIDE-OUT: Running in slow mode
 
 # CHECK-CMDLINE-OVERRIDE-OUT: PASS: per_test_timeout :: short.py
 
-# CHECK-CMDLINE-OVERRIDE-OUT: TIMEOUT: per_test_timeout :: slow.py
-# CHECK-CMDLINE-OVERRIDE-OUT: Timeout: Reached timeout of 2 seconds
-# CHECK-CMDLINE-OVERRIDE-OUT: Command {{([0-9]+ )?}}Output
-# CHECK-CMDLINE-OVERRIDE-OUT: Running slow program
-
 # CHECK-CMDLINE-OVERRIDE-OUT: Expected Passes{{ *}}: 1
-# CHECK-CMDLINE-OVERRIDE-OUT: Individual Timeouts{{ *}}: 3
+# CHECK-CMDLINE-OVERRIDE-OUT: Individual Timeouts{{ *}}: 1
diff --git a/utils/lit/tests/test-data-micro.py b/utils/lit/tests/test-data-micro.py
new file mode 100644
index 000000000000..634139e233f9
--- /dev/null
+++ b/utils/lit/tests/test-data-micro.py
@@ -0,0 +1,21 @@
+# Test features related to formats which support reporting additional test data.
+# and multiple test results.
+
+# RUN: %{lit} -j 1 -v %{inputs}/test-data-micro | FileCheck %s
+
+# CHECK: -- Testing:
+
+# CHECK: PASS: test-data-micro :: micro-tests.ini
+# CHECK-NEXT: *** TEST 'test-data-micro :: micro-tests.ini' RESULTS ***
+# CHECK-NEXT: value0: 1
+# CHECK-NEXT: value1: 2.3456
+# CHECK-NEXT: ***
+# CHECK-NEXT: *** MICRO-TEST: test0
+# CHECK-NEXT: micro_value0: 4
+# CHECK-NEXT: micro_value1: 1.3
+# CHECK-NEXT: *** MICRO-TEST: test1
+# CHECK-NEXT: micro_value0: 4
+# CHECK-NEXT: micro_value1: 1.3
+# CHECK-NEXT: *** MICRO-TEST: test2
+# CHECK-NEXT: micro_value0: 4
+# CHECK-NEXT: micro_value1: 1.3
diff --git a/utils/lit/tests/test-output-micro.py b/utils/lit/tests/test-output-micro.py
new file mode 100644
index 000000000000..4357fe88f905
--- /dev/null
+++ b/utils/lit/tests/test-output-micro.py
@@ -0,0 +1,51 @@
+# RUN: %{lit} -j 1 -v %{inputs}/test-data-micro --output %t.results.out
+# RUN: FileCheck < %t.results.out %s
+# RUN: rm %t.results.out
+
+
+# CHECK: {
+# CHECK: "__version__"
+# CHECK: "elapsed"
+# CHECK-NEXT: "tests": [
+# CHECK-NEXT:   {
+# CHECK-NEXT:     "code": "PASS",
+# CHECK-NEXT:     "elapsed": null,
+# CHECK-NEXT:     "metrics": {
+# CHECK-NEXT:       "micro_value0": 4,
+# CHECK-NEXT:       "micro_value1": 1.3
+# CHECK-NEXT:     },
+# CHECK-NEXT:     "name": "test-data-micro :: micro-tests.ini:test{{[0-2]}}",
+# CHECK-NEXT:     "output": ""
+# CHECK-NEXT:   },
+# CHECK-NEXT:   {
+# CHECK-NEXT:     "code": "PASS",
+# CHECK-NEXT:     "elapsed": null,
+# CHECK-NEXT:     "metrics": {
+# CHECK-NEXT:       "micro_value0": 4,
+# CHECK-NEXT:       "micro_value1": 1.3
+# CHECK-NEXT:     },
+# CHECK-NEXT:     "name": "test-data-micro :: micro-tests.ini:test{{[0-2]}}",
+# CHECK-NEXT:     "output": ""
+# CHECK-NEXT:   },
+# CHECK-NEXT:   {
+# CHECK-NEXT:     "code": "PASS",
+# CHECK-NEXT:     "elapsed": null,
+# CHECK-NEXT:     "metrics": {
+# CHECK-NEXT:       "micro_value0": 4,
+# CHECK-NEXT:       "micro_value1": 1.3
+# CHECK-NEXT:     },
+# CHECK-NEXT:     "name": "test-data-micro :: micro-tests.ini:test{{[0-2]}}",
+# CHECK-NEXT:     "output": ""
+# CHECK-NEXT:   },
+# CHECK-NEXT:   {
+# CHECK-NEXT:     "code": "PASS",
+# CHECK-NEXT:     "elapsed": {{[0-9.]+}},
+# CHECK-NEXT:     "metrics": {
+# CHECK-NEXT:       "value0": 1,
+# CHECK-NEXT:       "value1": 2.3456
+# CHECK-NEXT:     },
+# CHECK-NEXT:     "name": "test-data-micro :: micro-tests.ini",
+# CHECK-NEXT:     "output": "Test passed."
+# CHECK-NEXT:   }
+# CHECK-NEXT: ]
+# CHECK-NEXT: }
diff --git a/utils/lit/tests/unit/TestRunner.py b/utils/lit/tests/unit/TestRunner.py
index 874bf275d4ea..89209d80f555 100644
--- a/utils/lit/tests/unit/TestRunner.py
+++ b/utils/lit/tests/unit/TestRunner.py
@@ -99,8 +99,8 @@ class TestIntegratedTestKeywordParser(unittest.TestCase):
         cmd_parser = self.get_parser(parsers, 'MY_RUN:')
         value = cmd_parser.getValue()
         self.assertEqual(len(value), 2)  # there are only two run lines
-        self.assertEqual(value[0].strip(), 'baz')
-        self.assertEqual(value[1].strip(), 'foo  bar')
+        self.assertEqual(value[0].strip(), "%dbg(MY_RUN: at line 4)  baz")
+        self.assertEqual(value[1].strip(), "%dbg(MY_RUN: at line 7)  foo  bar")
 
     def test_custom(self):
         parsers = self.make_parsers()
diff --git a/utils/lit/tests/xunit-output.py b/utils/lit/tests/xunit-output.py
index 3f4939536379..930768e61dae 100644
--- a/utils/lit/tests/xunit-output.py
+++ b/utils/lit/tests/xunit-output.py
@@ -1,10 +1,16 @@
+# REQUIRES: shell
+
 # Check xunit output
-# RUN: %{lit} --xunit-xml-output %t.xunit.xml %{inputs}/test-data
+# RUN: rm -rf %t.xunit.xml
+# RUN: not %{lit} --xunit-xml-output %t.xunit.xml %{inputs}/xunit-output
+# If xmllint is installed verify that the generated xml is well-formed
+# RUN: sh -c 'if command -v xmllint 2>/dev/null; then xmllint --noout %t.xunit.xml; fi'
 # RUN: FileCheck < %t.xunit.xml %s
 
 # CHECK: <?xml version="1.0" encoding="UTF-8" ?>
 # CHECK: <testsuites>
-# CHECK: <testsuite name='test-data' tests='1' failures='0'>
-# CHECK: <testcase classname='test-data.test-data' name='metrics.ini' time='0.{{[0-9]+}}'/>
+# CHECK: <testsuite name="test-data" tests="1" failures="1" skipped="0">
+# CHECK: <testcase classname="test-data.test-data" name="bad&amp;name.ini" time="{{[0-1]}}.{{[0-9]+}}">
+# CHECK-NEXT: <failure ><![CDATA[& < > ]]]]><![CDATA[> &"]]></failure>
 # CHECK: </testsuite>
 # CHECK: </testsuites>
diff --git a/utils/lldbDataFormatters.py b/utils/lldbDataFormatters.py
index 687729f61edd..db1e22af792e 100644
--- a/utils/lldbDataFormatters.py
+++ b/utils/lldbDataFormatters.py
@@ -91,8 +91,18 @@ class ArrayRefSynthProvider:
         assert self.type_size != 0
 
 def OptionalSummaryProvider(valobj, internal_dict):
-    if not valobj.GetChildMemberWithName('hasVal').GetValueAsUnsigned(0):
+    storage = valobj.GetChildMemberWithName('Storage')
+    if not storage:
+        storage = valobj
+
+    failure = 2
+    hasVal = storage.GetChildMemberWithName('hasVal').GetValueAsUnsigned(failure)
+    if hasVal == failure:
+        return '<could not read llvm::Optional>'
+
+    if hasVal == 0:
         return 'None'
-    underlying_type = valobj.GetType().GetTemplateArgumentType(0)
-    storage = valobj.GetChildMemberWithName('storage')
+
+    underlying_type = storage.GetType().GetTemplateArgumentType(0)
+    storage = storage.GetChildMemberWithName('storage')
     return str(storage.Cast(underlying_type))
diff --git a/utils/llvm-build/llvmbuild/configutil.py b/utils/llvm-build/llvmbuild/configutil.py
deleted file mode 100644
index b5582c34de46..000000000000
--- a/utils/llvm-build/llvmbuild/configutil.py
+++ /dev/null
@@ -1,66 +0,0 @@
-"""
-Defines utilities useful for performing standard "configuration" style tasks.
-"""
-
-import re
-import os
-
-def configure_file(input_path, output_path, substitutions):
-    """configure_file(input_path, output_path, substitutions) -> bool
-
-    Given an input and output path, "configure" the file at the given input path
-    by replacing variables in the file with those given in the substitutions
-    list. Returns true if the output file was written.
-
-    The substitutions list should be given as a list of tuples (regex string,
-    replacement), where the regex and replacement will be used as in 're.sub' to
-    execute the variable replacement.
-
-    The output path's parent directory need not exist (it will be created).
-
-    If the output path does exist and the configured data is not different than
-    it's current contents, the output file will not be modified. This is
-    designed to limit the impact of configured files on build dependencies.
-    """
-
-    # Read in the input data.
-    f = open(input_path, "rb")
-    try:
-        data = f.read()
-    finally:
-        f.close()
-
-    # Perform the substitutions.
-    for regex_string,replacement in substitutions:
-        regex = re.compile(regex_string)
-        data = regex.sub(replacement, data)
-
-    # Ensure the output parent directory exists.
-    output_parent_path = os.path.dirname(os.path.abspath(output_path))
-    if not os.path.exists(output_parent_path):
-        os.makedirs(output_parent_path)
-
-    # If the output path exists, load it and compare to the configured contents.
-    if os.path.exists(output_path):
-        current_data = None
-        try:
-            f = open(output_path, "rb")
-            try:
-                current_data = f.read()
-            except:
-                current_data = None
-            f.close()
-        except:
-            current_data = None
-
-        if current_data is not None and current_data == data:
-            return False
-
-    # Write the output contents.
-    f = open(output_path, "wb")
-    try:
-        f.write(data)
-    finally:
-        f.close()
-
-    return True
diff --git a/utils/llvm-build/llvmbuild/main.py b/utils/llvm-build/llvmbuild/main.py
index fccfc7e6ece3..4533c6506309 100644
--- a/utils/llvm-build/llvmbuild/main.py
+++ b/utils/llvm-build/llvmbuild/main.py
@@ -4,7 +4,6 @@ import os
 import sys
 
 import llvmbuild.componentinfo as componentinfo
-import llvmbuild.configutil as configutil
 
 from llvmbuild.util import fatal, note
 
@@ -38,17 +37,6 @@ def cmake_quote_path(value):
 
     return value
 
-def mk_quote_string_for_target(value):
-    """
-    mk_quote_string_for_target(target_name) -> str
-
-    Return a quoted form of the given target_name suitable for including in a
-    Makefile as a target name.
-    """
-
-    # The only quoting we currently perform is for ':', to support msys users.
-    return value.replace(":", "\\:")
-
 def make_install_dir(path):
     """
     make_install_dir(path) -> None
@@ -387,7 +375,7 @@ subdirectories = %s
         f.write("""\
 //===- llvm-build generated file --------------------------------*- C++ -*-===//
 //
-// Component Library Depenedency Table
+// Component Library Dependency Table
 //
 // Automatically generated file, do not edit!
 //
@@ -651,87 +639,6 @@ set_property(TARGET %s PROPERTY IMPORTED_LINK_INTERFACE_LIBRARIES %s)\n""" % (
 
         f.close()
 
-    def write_make_fragment(self, output_path, enabled_optional_components):
-        """
-        write_make_fragment(output_path) -> None
-
-        Generate a Makefile fragment which includes all of the collated
-        LLVMBuild information in a format that is easily digestible by a
-        Makefile. The exact contents of this are closely tied to how the LLVM
-        Makefiles integrate LLVMBuild, see Makefile.rules in the top-level.
-        """
-
-        dependencies = list(self.get_fragment_dependencies())
-
-        # Write out the Makefile fragment.
-        make_install_dir(os.path.dirname(output_path))
-        f = open(output_path, 'w')
-
-        # Write the header.
-        header_fmt = '\
-#===-- %s - LLVMBuild Configuration for LLVM %s-*- Makefile -*--===#'
-        header_name = os.path.basename(output_path)
-        header_pad = '-' * (80 - len(header_fmt % (header_name, '')))
-        header_string = header_fmt % (header_name, header_pad)
-        f.write("""\
-%s
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-#
-# This file contains the LLVMBuild project information in a format easily
-# consumed by the Makefile based build system.
-#
-# This file is autogenerated by llvm-build, do not edit!
-#
-#===------------------------------------------------------------------------===#
-
-""" % header_string)
-
-        # Write the dependencies for the fragment.
-        #
-        # FIXME: Technically, we need to properly quote for Make here.
-        f.write("""\
-# Clients must explicitly enable LLVMBUILD_INCLUDE_DEPENDENCIES to get
-# these dependencies. This is a compromise to help improve the
-# performance of recursive Make systems.
-""")
-        f.write('ifeq ($(LLVMBUILD_INCLUDE_DEPENDENCIES),1)\n')
-        f.write("# The dependencies for this Makefile fragment itself.\n")
-        f.write("%s: \\\n" % (mk_quote_string_for_target(output_path),))
-        for dep in dependencies:
-            f.write("\t%s \\\n" % (dep,))
-        f.write('\n')
-
-        # Generate dummy rules for each of the dependencies, so that things
-        # continue to work correctly if any of those files are moved or removed.
-        f.write("""\
-# The dummy targets to allow proper regeneration even when files are moved or
-# removed.
-""")
-        for dep in dependencies:
-            f.write("%s:\n" % (mk_quote_string_for_target(dep),))
-        f.write('endif\n')
-
-        f.write("""
-# List of libraries to be exported for use by applications.
-# See 'cmake/modules/Makefile'.
-LLVM_LIBS_TO_EXPORT :=""")
-        self.foreach_cmake_library(
-            lambda ci:
-                f.write(' \\\n  %s' % ci.get_prefixed_library_name())
-            ,
-            enabled_optional_components,
-            skip_disabled = True,
-            skip_not_installed = True # Do not export internal libraries like gtest
-            )
-        f.write('\n')
-        f.close()
-
 def add_magic_target_components(parser, project, opts):
     """add_magic_target_components(project, opts) -> None
 
@@ -853,9 +760,6 @@ def main():
                      help=(
             "If given, an alternate path to search for LLVMBuild.txt files"),
                      action="store", default=None, metavar="PATH")
-    group.add_option("", "--build-root", dest="build_root", metavar="PATH",
-                      help="Path to the build directory (if needed) [%default]",
-                      action="store", default=None)
     parser.add_option_group(group)
 
     group = OptionGroup(parser, "Output Options")
@@ -877,18 +781,6 @@ def main():
                      dest="write_cmake_exports_fragment", metavar="PATH",
                      help="Write the CMake exports information to PATH",
                      action="store", default=None)
-    group.add_option("", "--write-make-fragment",
-                      dest="write_make_fragment", metavar="PATH",
-                     help="Write the Makefile project information to PATH",
-                     action="store", default=None)
-    group.add_option("", "--configure-target-def-file",
-                     dest="configure_target_def_files",
-                     help="""Configure the given file at SUBPATH (relative to
-the inferred or given source root, and with a '.in' suffix) by replacing certain
-substitution variables with lists of targets that support certain features (for
-example, targets with AsmPrinters) and write the result to the build root (as
-given by --build-root) at the same SUBPATH""",
-                     metavar="SUBPATH", action="append", default=None)
     parser.add_option_group(group)
 
     group = OptionGroup(parser, "Configuration Options")
@@ -951,11 +843,6 @@ given by --build-root) at the same SUBPATH""",
         project_info.write_library_table(opts.write_library_table,
                                          opts.optional_components)
 
-    # Write out the make fragment, if requested.
-    if opts.write_make_fragment:
-        project_info.write_make_fragment(opts.write_make_fragment,
-                                         opts.optional_components)
-
     # Write out the cmake fragment, if requested.
     if opts.write_cmake_fragment:
         project_info.write_cmake_fragment(opts.write_cmake_fragment,
@@ -964,40 +851,5 @@ given by --build-root) at the same SUBPATH""",
         project_info.write_cmake_exports_fragment(opts.write_cmake_exports_fragment,
                                                   opts.optional_components)
 
-    # Configure target definition files, if requested.
-    if opts.configure_target_def_files:
-        # Verify we were given a build root.
-        if not opts.build_root:
-            parser.error("must specify --build-root when using "
-                         "--configure-target-def-file")
-
-        # Create the substitution list.
-        available_targets = [ci for ci in project_info.component_infos
-                             if ci.type_name == 'TargetGroup']
-        substitutions = [
-            ("@LLVM_ENUM_TARGETS@",
-             ' '.join('LLVM_TARGET(%s)' % ci.name
-                      for ci in available_targets)),
-            ("@LLVM_ENUM_ASM_PRINTERS@",
-             ' '.join('LLVM_ASM_PRINTER(%s)' % ci.name
-                      for ci in available_targets
-                      if ci.has_asmprinter)),
-            ("@LLVM_ENUM_ASM_PARSERS@",
-             ' '.join('LLVM_ASM_PARSER(%s)' % ci.name
-                      for ci in available_targets
-                      if ci.has_asmparser)),
-            ("@LLVM_ENUM_DISASSEMBLERS@",
-             ' '.join('LLVM_DISASSEMBLER(%s)' % ci.name
-                      for ci in available_targets
-                      if ci.has_disassembler))]
-
-        # Configure the given files.
-        for subpath in opts.configure_target_def_files:
-            inpath = os.path.join(source_root, subpath + '.in')
-            outpath = os.path.join(opts.build_root, subpath)
-            result = configutil.configure_file(inpath, outpath, substitutions)
-            if not result:
-                note("configured file %r hasn't changed" % outpath)
-
 if __name__=='__main__':
     main()
diff --git a/utils/makellvm b/utils/makellvm
deleted file mode 100755
index ae77712941a2..000000000000
--- a/utils/makellvm
+++ /dev/null
@@ -1,145 +0,0 @@
-#!/bin/csh -f
-
-set pstatus = 0
-onintr cleanup
-alias usage 'echo "USAGE: $0:t [-h] [-n] [-obj obj-root] [gmake-flags] [VAR=...] [toolname (default: opt)]"; set pstatus = 1; goto cleanup'
-
-set EXEC = opt
-set GMAKE_OPTS = ""
-set DEBUG = 0
-
-## Search path for automatically finding the obj-root to use.
-## Note: The src root directory ${LLVMDIR} will be prepended to this path later.
-## 
-set OBJROOTDIRLIST = ( )
-
-set doit = 1
-unset options_done
-while ( !( $?options_done ) && ($#argv > 0))
-    switch ($argv[1])
-	case -h :
-	    usage
-	case -f :
-	    if ($#argv < 2) usage
-	    shift argv; set MFILE = $argv[1]; shift argv; breaksw
-	case -n :
-	    set doit = 0; shift argv; breaksw
-	case -obj :
-	    set OBJROOT = $argv[2]; shift argv; shift argv
-	    if (! -d "$OBJROOT") then
-		echo "FATAL: Illegal obj-root directory ${OBJROOT}"
-		exit 1
-	    endif
-	    breaksw
-	case -d :
-	    set doit = 0; set DEBUG = 1; shift argv; breaksw
-	case -* :
-	    set GMAKE_OPTS = ( $GMAKE_OPTS $argv[1] ); shift argv; breaksw
-	default :
-	    set optarg = `echo -n $argv[1] | sed 's/^[^=]*$//'`
-	    if ($#optarg) then
-	        set GMAKE_OPTS = ( $GMAKE_OPTS $optarg )
-	        shift argv
-	    else
-	        set options_done
-	    endif
-            breaksw
-    endsw
-end
-
-if ($#argv > 1) then
-    echo 'ERROR: More than one tool is not supported by "makellvm"'
-    usage
-endif
-if ($#argv > 0) then
-    set EXEC = $argv[1]
-endif
-if ($DEBUG) then
-    echo "DEBUG: EXEC = $EXEC"
-endif
-
-## Compute LLVMDIR: the root of the current LLVM tree.
-## It is recorded in the variable LEVEL in Makefile, to compute it
-## 
-if (! $?MFILE) then
-    if (-f GNUmakefile) then
-	set MFILE = GNUmakefile
-    else if (-f makefile) then
-	set MFILE = makefile
-    else
-	set MFILE = Makefile
-    endif
-endif
-if ($DEBUG) then
-    echo "DEBUG: MFILE = $MFILE"
-endif
-if (! -f $MFILE) then
-    echo "Missing or invalid makefile: $MFILE"
-    exit 1
-endif
-
-set LLVMDIR = `awk '/LEVEL[ 	]*=/ {print $NF}' $MFILE`
-if ($DEBUG) then
-    echo "DEBUG: LLVMDIR = $LLVMDIR"
-endif
-
-if ($#LLVMDIR == 0 || ! -d "$LLVMDIR") then
-    echo "Unable to find LLVM src-root directory or directory is invalid."
-    echo "Are you within a valid LLVM directory for running gmake?"
-    exit 1
-endif
-
-## Try to determine the obj-root directory automatically if not specified
-## 
-set OBJROOTDIRLIST = ( ${LLVMDIR} $OBJROOTDIRLIST )	## add src dir
-if ($?OBJROOT == 0) then
-    ## Try to determine object root directory by looking for Makefile.config
-    foreach objdir ( $OBJROOTDIRLIST )
-	if (-f "${objdir}/Makefile.config") then
-	    set OBJROOT = ${objdir}
-            break
-        endif
-    end
-    if ($?OBJROOT == 0) then
-	echo "FATAL: Could not choose an obj-root directory from these choices:"
-	echo "       ${OBJROOTDIRLIST}."
-	echo "       You can specify it explicitly using '-obj obj-root'."
-	exit 1
-    endif
-    echo "Using OBJ-ROOT = ${OBJROOT} (specify '-obj obj-root' to override)."
-endif
-if (${OBJROOT} == ${LLVMDIR}) then
-    # run make in the source directory itself
-    set BUILDROOT = .
-else
-    # run make in the in the obj-root tree, in the directory for $cwd
-    set SRCROOT = `sh -c "cd $LLVMDIR; pwd | sed 's/\//\\\//g'"` 
-    set CURSRCDIR = `echo $cwd | sed -e "s/${SRCROOT}//"`
-    set BUILDROOT = ${OBJROOT}/${CURSRCDIR}
-    unset SRCROOT CURSRCDIR
-endif
-if ($DEBUG) then
-    echo "DEBUG: BUILDROOT = $BUILDROOT"
-endif
-if (! -d $BUILDROOT) then
-    echo "FATAL: Invalid build directory: ${BUILDROOT}"
-    exit 1
-endif
-cd $BUILDROOT 
-
-set CMD = "make $GMAKE_OPTS && (cd $LLVMDIR/tools/$EXEC && make $GMAKE_OPTS)"
-
-if ($doit == 1) then
-    csh -f -c "$CMD"
-    set pstatus = $?
-else
-    echo '(NOT EXECUTING) COMMAND:'
-    echo "  $CMD"
-endif
-
-
-#=========================================================
-# CODE TO BE EXECUTED IF INTERRUPT IS RECEIVED
-#=========================================================
-cleanup:
-    exit($pstatus)
diff --git a/utils/not/not.cpp b/utils/not/not.cpp
index de71b4c68878..f48079d8875a 100644
--- a/utils/not/not.cpp
+++ b/utils/not/not.cpp
@@ -38,8 +38,12 @@ int main(int argc, const char **argv) {
     return 1;
   }
 
+  std::vector<StringRef> Argv;
+  Argv.reserve(argc);
+  for (int i = 0; i < argc; ++i)
+    Argv.push_back(argv[i]);
   std::string ErrMsg;
-  int Result = sys::ExecuteAndWait(*Program, argv, nullptr, {}, 0, 0, &ErrMsg);
+  int Result = sys::ExecuteAndWait(*Program, Argv, None, {}, 0, 0, &ErrMsg);
 #ifdef _WIN32
   // Handle abort() in msvcrt -- It has exit code as 3.  abort(), aka
   // unreachable, should be recognized as a crash.  However, some binaries use
diff --git a/utils/release/build_llvm_package.bat b/utils/release/build_llvm_package.bat
index ef9b21ce923a..f39a2fa73f9a 100755
--- a/utils/release/build_llvm_package.bat
+++ b/utils/release/build_llvm_package.bat
@@ -8,26 +8,19 @@ REM Usage: build_llvm_package.bat <revision>
 
 REM Prerequisites:
 REM
-REM   Visual Studio 2017, CMake, Ninja, SVN, GNUWin32, SWIG, Python 3,
+REM   Visual Studio 2017, CMake, Ninja, SVN, GNUWin32,
 REM   NSIS with the strlen_8192 patch,
 REM   Visual Studio 2017 SDK and Nuget (for the clang-format plugin),
 REM   Perl (for the OpenMP run-time).
-REM
-REM
-REM   For LLDB, SWIG version <= 3.0.8 needs to be used to work around
-REM   https://github.com/swig/swig/issues/769
 
 
 REM You need to modify the paths below:
 set vsdevcmd=C:\Program Files (x86)\Microsoft Visual Studio\2017\Professional\Common7\Tools\VsDevCmd.bat
 
-set python32_dir=C:\Users\%USER%\AppData\Local\Programs\Python\Python35-32
-set python64_dir=C:\Users\%USER%\AppData\Local\Programs\Python\Python35
-
 set revision=%1
 set branch=trunk
-set package_version=6.0.0-r%revision%
-set clang_format_vs_version=6.0.0.%revision%
+set package_version=7.0.0-r%revision%
+set clang_format_vs_version=7.0.0.%revision%
 set build_dir=llvm_package_%revision%
 
 echo Branch: %branch%
@@ -47,22 +40,23 @@ svn.exe export -r %revision% http://llvm.org/svn/llvm-project/cfe/%branch% llvm/
 svn.exe export -r %revision% http://llvm.org/svn/llvm-project/clang-tools-extra/%branch% llvm/tools/clang/tools/extra || exit /b
 svn.exe export -r %revision% http://llvm.org/svn/llvm-project/lld/%branch% llvm/tools/lld || exit /b
 svn.exe export -r %revision% http://llvm.org/svn/llvm-project/compiler-rt/%branch% llvm/projects/compiler-rt || exit /b
-svn.exe export -r %revision% http://llvm.org/svn/llvm-project/openmp/%branch% llvm/projects/openmp || exit /b
-svn.exe export -r %revision% http://llvm.org/svn/llvm-project/lldb/%branch% llvm/tools/lldb || exit /b
+REM svn.exe export -r %revision% http://llvm.org/svn/llvm-project/openmp/%branch% llvm/projects/openmp || exit /b
 
 
 REM Setting CMAKE_CL_SHOWINCLUDES_PREFIX to work around PR27226.
-set cmake_flags=-DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=ON -DLLVM_INSTALL_TOOLCHAIN_ONLY=ON -DCMAKE_INSTALL_UCRT_LIBRARIES=ON -DCLANG_FORMAT_VS_VERSION=%clang_format_vs_version% -DPACKAGE_VERSION=%package_version% -DLLDB_RELOCATABLE_PYTHON=1 -DLLDB_TEST_COMPILER=%cd%\build32_stage0\bin\clang.exe -DCMAKE_CL_SHOWINCLUDES_PREFIX="Note: including file: "
+set cmake_flags=-DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=ON -DLLVM_USE_CRT_RELEASE=MT -DLLVM_INSTALL_TOOLCHAIN_ONLY=ON -DCLANG_FORMAT_VS_VERSION=%clang_format_vs_version% -DPACKAGE_VERSION=%package_version% -DCMAKE_CL_SHOWINCLUDES_PREFIX="Note: including file: "
 
 REM TODO: Run all tests, including lld and compiler-rt.
 
+set "VSCMD_START_DIR=%CD%"
 call "%vsdevcmd%" -arch=x86
 set CC=
 set CXX=
 mkdir build32_stage0
 cd build32_stage0
-cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python32_dir% ..\llvm || exit /b
-ninja all || exit /b
+REM Work around VS2017 bug by using MinSizeRel.
+cmake -GNinja %cmake_flags% -DCMAKE_BUILD_TYPE=MinSizeRel ..\llvm || exit /b
+ninja all || ninja all || ninja all || exit /b
 ninja check || ninja check || ninja check || exit /b
 ninja check-clang || ninja check-clang || ninja check-clang ||  exit /b
 cd..
@@ -71,8 +65,8 @@ mkdir build32
 cd build32
 set CC=..\build32_stage0\bin\clang-cl
 set CXX=..\build32_stage0\bin\clang-cl
-cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python32_dir% ..\llvm || exit /b
-ninja all || exit /b
+cmake -GNinja %cmake_flags% ..\llvm || exit /b
+ninja all || ninja all || ninja all || exit /b
 ninja check || ninja check || ninja check || exit /b
 ninja check-clang || ninja check-clang || ninja check-clang ||  exit /b
 ninja package || exit /b
@@ -83,19 +77,21 @@ mkdir build_vsix
 cd build_vsix
 set CC=..\build32_stage0\bin\clang-cl
 set CXX=..\build32_stage0\bin\clang-cl
-cmake -GNinja %cmake_flags% -DLLVM_USE_CRT_RELEASE=MT -DBUILD_CLANG_FORMAT_VS_PLUGIN=ON -DPYTHON_HOME=%python32_dir% ..\llvm || exit /b
+cmake -GNinja %cmake_flags% -DBUILD_CLANG_FORMAT_VS_PLUGIN=ON ..\llvm || exit /b
 ninja clang_format_vsix || exit /b
 copy ..\llvm\tools\clang\tools\clang-format-vs\ClangFormat\bin\Release\ClangFormat.vsix ClangFormat-r%revision%.vsix
 cd ..
 
 
+set "VSCMD_START_DIR=%CD%"
 call "%vsdevcmd%" -arch=amd64
 set CC=
 set CXX=
 mkdir build64_stage0
 cd build64_stage0
-cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python64_dir% ..\llvm || exit /b
-ninja all || exit /b
+REM Work around VS2017 bug by using MinSizeRel.
+cmake -GNinja %cmake_flags% -DCMAKE_BUILD_TYPE=MinSizeRel ..\llvm || exit /b
+ninja all || ninja all || ninja all || exit /b
 ninja check || ninja check || ninja check || exit /b
 ninja check-clang || ninja check-clang || ninja check-clang ||  exit /b
 cd..
@@ -104,8 +100,8 @@ mkdir build64
 cd build64
 set CC=..\build64_stage0\bin\clang-cl
 set CXX=..\build64_stage0\bin\clang-cl
-cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python64_dir% ..\llvm || exit /b
-ninja all || exit /b
+cmake -GNinja %cmake_flags% ..\llvm || exit /b
+ninja all || ninja all || ninja all || exit /b
 ninja check || ninja check || ninja check || exit /b
 ninja check-clang || ninja check-clang || ninja check-clang ||  exit /b
 ninja package || exit /b
diff --git a/utils/release/merge-git.sh b/utils/release/merge-git.sh
new file mode 100755
index 000000000000..81d577db9d62
--- /dev/null
+++ b/utils/release/merge-git.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+#===-- merge-git.sh - Merge commit to the stable branch --------------------===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License.
+#
+#===------------------------------------------------------------------------===#
+#
+# This script will merge an svn revision to a git repo using git-svn while
+# preserving the svn commit message.
+# 
+# NOTE: This script has only been tested with the per-project git repositories
+# and not with the monorepo.
+#
+# In order to use this script, you must:
+# 1) Checkout the stable branch you would like to merge the revision into.
+# 2) Correctly configure the branch as an svn-remote by adding the following to
+# your .git/config file for your git repo (replace xy with the major/minor
+# version of the release branch. e.g. release_50 or release_60):
+#
+#[svn-remote "release_xy"]
+#url = https://llvm.org/svn/llvm-project/llvm/branches/release_xy
+#fetch = :refs/remotes/origin/release_xy
+#
+# Once the script completes successfully, you can push your changes with
+# git-svn dcommit
+#
+#===------------------------------------------------------------------------===#
+
+
+usage() {
+    echo "usage: `basename $0` [OPTIONS]"
+    echo "  -rev NUM       The revision to merge into the project"
+}
+
+while [ $# -gt 0 ]; do
+    case $1 in
+        -rev | --rev | -r )
+            shift
+            rev=$1
+            ;;
+        -h | -help | --help )
+            usage
+            ;;
+        * )
+            echo "unknown option: $1"
+            echo ""
+            usage
+            exit 1
+            ;;
+    esac
+    shift
+done
+
+if [ -z "$rev" ]; then
+    echo "error: need to specify a revision"
+    echo
+    usage
+    exit 1
+fi
+
+# Rebuild revision map
+git svn find-rev r$rev origin/master &>/dev/null
+
+git_hash=`git svn find-rev r$rev origin/master`
+
+if [ -z "$git_hash" ]; then
+    echo "error: could not determine git commit for r$rev"
+    exit 1
+fi
+
+commit_msg=`svn log -r $rev https://llvm.org/svn/llvm-project/`
+ammend="--amend"
+
+git cherry-pick $git_hash
+if [ $? -ne 0 ]; then
+  echo ""
+  echo "** cherry-pick failed enter 'e' to exit or 'c' when you have finished resolving the conflicts:"
+  read option
+  case $option in
+    c)
+      ammend=""
+      ;;
+    *)
+      exit 1
+      ;;
+  esac
+fi
+         
+git commit $ammend -m "Merging r$rev:" -m "$commit_msg"
diff --git a/utils/release/merge-request.sh b/utils/release/merge-request.sh
index 6691b3733bbf..90c7bdd627fd 100755
--- a/utils/release/merge-request.sh
+++ b/utils/release/merge-request.sh
@@ -95,6 +95,9 @@ case $stable_version in
   5.0)
     release_metabug="34492"
     ;;
+  6.0)
+    release_metabug="36649"
+    ;;
   *)
     echo "error: invalid stable version"
     exit 1
diff --git a/utils/release/test-release.sh b/utils/release/test-release.sh
index 66a2c578083e..440dee53c1b7 100755
--- a/utils/release/test-release.sh
+++ b/utils/release/test-release.sh
@@ -33,6 +33,7 @@ do_asserts="no"
 do_compare="yes"
 do_rt="yes"
 do_libs="yes"
+do_libcxxabi="yes"
 do_libunwind="yes"
 do_test_suite="yes"
 do_openmp="yes"
@@ -62,6 +63,7 @@ function usage() {
     echo "                      For example -svn-path trunk or -svn-path branches/release_37"
     echo " -no-rt               Disable check-out & build Compiler-RT"
     echo " -no-libs             Disable check-out & build libcxx/libcxxabi/libunwind"
+    echo " -no-libcxxabi        Disable check-out & build libcxxabi"
     echo " -no-libunwind        Disable check-out & build libunwind"
     echo " -no-test-suite       Disable check-out & build test-suite"
     echo " -no-openmp           Disable check-out & build libomp"
@@ -135,6 +137,9 @@ while [ $# -gt 0 ]; do
         -no-libs )
             do_libs="no"
             ;;
+        -no-libcxxabi )
+            do_libcxxabi="no"
+            ;;
         -no-libunwind )
             do_libunwind="no"
             ;;
@@ -206,7 +211,10 @@ if [ $do_rt = "yes" ]; then
   projects="$projects compiler-rt"
 fi
 if [ $do_libs = "yes" ]; then
-  projects="$projects libcxx libcxxabi"
+  projects="$projects libcxx"
+  if [ $do_libcxxabi = "yes" ]; then
+    projects="$projects libcxxabi"
+  fi
   if [ $do_libunwind = "yes" ]; then
     projects="$projects libunwind"
   fi
diff --git a/utils/sanitizers/ubsan_blacklist.txt b/utils/sanitizers/ubsan_blacklist.txt
index b5bbfddceef6..69230a3e4650 100644
--- a/utils/sanitizers/ubsan_blacklist.txt
+++ b/utils/sanitizers/ubsan_blacklist.txt
@@ -10,3 +10,8 @@ src:*bits/stl_tree.h
 # data() on an empty vector: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59829
 src:*bits/stl_iterator.h
 src:*bits/stl_vector.h
+
+# These auto-generated functions compile down to ~50k basic blocks with inlining
+# and UBSan enabled, causing long builds that lead to bot timeouts.
+# https://bugs.llvm.org/show_bug.cgi?id=37929
+fun:*AArch64*InstPrinter*printAliasInstr*
diff --git a/utils/schedcover.py b/utils/schedcover.py
index ad9b78b207f1..8c0aeeb32b53 100644
--- a/utils/schedcover.py
+++ b/utils/schedcover.py
@@ -31,20 +31,26 @@ def filter_model(m):
 def display():
     global table, models
 
+    # remove default and itinerary so we can control their sort order to make
+    # them first
+    models.discard("default")
+    models.discard("itinerary")
+
     ordered_table  = sorted(table.items(), key=operator.itemgetter(0))
-    ordered_models = filter(filter_model, sorted(models))
+    ordered_models = ["itinerary", "default"]
+    ordered_models.extend(sorted(models))
+    ordered_models = filter(filter_model, ordered_models)
 
     # print header
     sys.stdout.write("instruction")
     for model in ordered_models:
-        if not model: model = "default"
         sys.stdout.write(", {}".format(model))
     sys.stdout.write(os.linesep)
 
     for (instr, mapping) in ordered_table:
         sys.stdout.write(instr)
         for model in ordered_models:
-            if model in mapping:
+            if model in mapping and mapping[model] is not None:
                 sys.stdout.write(", {}".format(mapping[model]))
             else:
                 sys.stdout.write(", ")
@@ -57,18 +63,21 @@ def machineModelCover(path):
     re_sched_no_default = re.compile("No machine model for ([^ ]*)\n");
     re_sched_spec = re.compile("InstRW on ([^ ]*) for ([^ ]*) (.*)\n");
     re_sched_no_spec = re.compile("No machine model for ([^ ]*) on processor (.*)\n");
+    re_sched_itin = re.compile("Itinerary for ([^ ]*): ([^ ]*)\n")
 
     # scan the file
     with open(path, 'r') as f:
         for line in f.readlines():
             match = re_sched_default.match(line)
-            if match: add(match.group(1), None, match.group(2))
+            if match: add(match.group(1), "default", match.group(2))
             match = re_sched_no_default.match(line)
-            if match: add(match.group(1), None)
+            if match: add(match.group(1), "default")
             match = re_sched_spec.match(line)
             if match: add(match.group(2), match.group(1), match.group(3))
-            match = re_sched_no_default.match(line)
-            if match: add(match.group(1), None)
+            match = re_sched_no_spec.match(line)
+            if match: add(match.group(1), match.group(2))
+            match = re_sched_itin.match(line)
+            if match: add(match.group(1), "itinerary", match.group(2))
 
     display()
 
diff --git a/utils/unicode-case-fold.py b/utils/unicode-case-fold.py
new file mode 100755
index 000000000000..98c56839c6c8
--- /dev/null
+++ b/utils/unicode-case-fold.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python
+"""
+Unicode case folding database conversion utility
+
+Parses the database and generates a C++ function which implements the case
+folding algorithm. The database entries are of the form:
+
+  <code>; <status>; <mapping>; # <name>
+
+<status> can be one of four characters:
+  C - Common mappings
+  S - mappings for Simple case folding
+  F - mappings for Full case folding
+  T - special case for Turkish I characters
+
+Right now this generates a function which implements simple case folding (C+S
+entries).
+"""
+
+import sys
+import re
+import urllib2
+
+# This variable will body of the mappings function
+body = ""
+
+# Reads file line-by-line, extracts Common and Simple case fold mappings and
+# returns a (from_char, to_char, from_name) tuple.
+def mappings(f):
+    previous_from = -1
+    expr = re.compile(r'^(.*); [CS]; (.*); # (.*)')
+    for line in f:
+        m = expr.match(line)
+        if not m: continue
+        from_char = int(m.group(1), 16)
+        to_char = int(m.group(2), 16)
+        from_name = m.group(3)
+
+        if from_char <= previous_from:
+            raise Exception("Duplicate or unsorted characters in input")
+        yield from_char, to_char, from_name
+        previous_from = from_char
+
+# Computes the shift (to_char - from_char) in a mapping.
+def shift(mapping):
+    return mapping[1] - mapping[0]
+
+# Computes the stride (from_char2 - from_char1) of two mappings.
+def stride2(mapping1, mapping2):
+    return mapping2[0] - mapping1[0]
+
+# Computes the stride of a list of mappings. The list should have at least two
+# mappings. All mappings in the list are assumed to have the same stride.
+def stride(block):
+    return stride2(block[0], block[1])
+
+
+# b is a list of mappings. All the mappings are assumed to have the same
+# shift and the stride between adjecant mappings (if any) is constant.
+def dump_block(b):
+    global body
+
+    if len(b) == 1:
+        # Special case for handling blocks of length 1. We don't even need to
+        # emit the "if (C < X) return C" check below as all characters in this
+        # range will be caught by the "C < X" check emitted by the first
+        # non-trivial block.
+        body  += "  // {2}\n  if (C == {0:#06x})\n    return {1:#06x};\n".format(*b[0])
+        return
+
+    first = b[0][0]
+    last = first + stride(b) * (len(b)-1)
+    modulo = first % stride(b)
+
+    # All characters before this block map to themselves.
+    body += "  if (C < {0:#06x})\n    return C;\n".format(first)
+    body += "  // {0} characters\n".format(len(b))
+
+    # Generic pattern: check upper bound (lower bound is checked by the "if"
+    # above) and modulo of C, return C+shift.
+    pattern = "  if (C <= {0:#06x} && C % {1} == {2})\n    return C + {3};\n"
+
+    if stride(b) == 2 and shift(b[0]) == 1 and modulo == 0:
+        # Special case:
+        # We can elide the modulo-check because the expression "C|1" will map
+        # the intervening characters to themselves.
+        pattern = "  if (C <= {0:#06x})\n    return C | 1;\n"
+    elif stride(b) == 1:
+        # Another special case: X % 1 is always zero, so don't emit the
+        # modulo-check.
+        pattern = "  if (C <= {0:#06x})\n    return C + {3};\n"
+
+    body += pattern.format(last, stride(b), modulo, shift(b[0]))
+
+current_block = []
+f = urllib2.urlopen(sys.argv[1])
+for m in mappings(f):
+    if len(current_block) == 0:
+        current_block.append(m)
+        continue
+
+    if shift(current_block[0]) != shift(m):
+        # Incompatible shift, start a new block.
+        dump_block(current_block)
+        current_block = [m]
+        continue
+
+    if len(current_block) == 1 or stride(current_block) == stride2(current_block[-1], m):
+        current_block.append(m)
+        continue
+
+    # Incompatible stride, start a new block.
+    dump_block(current_block)
+    current_block = [m]
+f.close()
+
+dump_block(current_block)
+
+print '//===---------- Support/UnicodeCaseFold.cpp -------------------------------===//'
+print '//'
+print '// This file was generated by utils/unicode-case-fold.py from the Unicode'
+print '// case folding database at'
+print '//   ', sys.argv[1]
+print '//'
+print '// To regenerate this file, run:'
+print '//   utils/unicode-case-fold.py \\'
+print '//     "{}" \\'.format(sys.argv[1])
+print '//     > lib/Support/UnicodeCaseFold.cpp'
+print '//'
+print '//===----------------------------------------------------------------------===//'
+print ''
+print '#include "llvm/Support/Unicode.h"'
+print ''
+print "int llvm::sys::unicode::foldCharSimple(int C) {"
+print body
+print "  return C;"
+print "}"
diff --git a/utils/unittest/googlemock/include/gmock/gmock-matchers.h b/utils/unittest/googlemock/include/gmock/gmock-matchers.h
index 749a30e4e6d8..9f001c9e63c6 100644
--- a/utils/unittest/googlemock/include/gmock/gmock-matchers.h
+++ b/utils/unittest/googlemock/include/gmock/gmock-matchers.h
@@ -2654,7 +2654,7 @@ class WhenSortedByMatcher {
       LhsStlContainerReference lhs_stl_container = LhsView::ConstReference(lhs);
       ::std::vector<LhsValue> sorted_container(lhs_stl_container.begin(),
                                                lhs_stl_container.end());
-      ::std::sort(
+      ::llvm::sort(
            sorted_container.begin(), sorted_container.end(), comparator_);
 
       if (!listener->IsInterested()) {
diff --git a/utils/unittest/googletest/include/gtest/gtest-message.h b/utils/unittest/googletest/include/gtest/gtest-message.h
index 47ed669a9b1b..30cb5ed6993c 100644
--- a/utils/unittest/googletest/include/gtest/gtest-message.h
+++ b/utils/unittest/googletest/include/gtest/gtest-message.h
@@ -49,36 +49,7 @@
 #include <limits>
 
 #include "gtest/internal/gtest-port.h"
-
-#if !GTEST_NO_LLVM_RAW_OSTREAM
-#include "llvm/Support/raw_os_ostream.h"
-
-// LLVM INTERNAL CHANGE: To allow operator<< to work with both
-// std::ostreams and LLVM's raw_ostreams, we define a special
-// std::ostream with an implicit conversion to raw_ostream& and stream
-// to that.  This causes the compiler to prefer std::ostream overloads
-// but still find raw_ostream& overloads.
-namespace llvm {
-class convertible_fwd_ostream : public std::ostream {
-  raw_os_ostream ros_;
-
-public:
-  convertible_fwd_ostream(std::ostream& os)
-    : std::ostream(os.rdbuf()), ros_(*this) {}
-  operator raw_ostream&() { return ros_; }
-};
-}
-template <typename T>
-inline void GTestStreamToHelper(std::ostream& os, const T& val) {
-  llvm::convertible_fwd_ostream cos(os);
-  cos << val;
-}
-#else
-template <typename T>
-inline void GTestStreamToHelper(std::ostream& os, const T& val) {
-  os << val;
-}
-#endif
+#include "gtest/internal/custom/raw-ostream.h"
 
 // Ensures that there is at least one operator<< in the global namespace.
 // See Message& operator<<(...) below for why.
@@ -157,12 +128,8 @@ class GTEST_API_ Message {
     // from the global namespace.  With this using declaration,
     // overloads of << defined in the global namespace and those
     // visible via Koenig lookup are both exposed in this function.
-#if GTEST_NO_LLVM_RAW_OSTREAM
     using ::operator <<;
-    *ss_ << val;
-#else
-    ::GTestStreamToHelper(*ss_, val);
-#endif
+    *ss_ << llvm_gtest::printable(val);
     return *this;
   }
 
@@ -184,11 +151,7 @@ class GTEST_API_ Message {
     if (pointer == NULL) {
       *ss_ << "(null)";
     } else {
-#if GTEST_NO_LLVM_RAW_OSTREAM
-      *ss_ << pointer;
-#else
-      ::GTestStreamToHelper(*ss_, pointer);
-#endif
+      *ss_ << llvm_gtest::printable(pointer);
     }
     return *this;
   }
diff --git a/utils/unittest/googletest/include/gtest/gtest-printers.h b/utils/unittest/googletest/include/gtest/gtest-printers.h
index 8a33164cb38a..be793bbb151e 100644
--- a/utils/unittest/googletest/include/gtest/gtest-printers.h
+++ b/utils/unittest/googletest/include/gtest/gtest-printers.h
@@ -102,6 +102,7 @@
 #include <vector>
 #include "gtest/internal/gtest-port.h"
 #include "gtest/internal/gtest-internal.h"
+#include "gtest/internal/custom/raw-ostream.h"
 
 #if GTEST_HAS_STD_TUPLE_
 # include <tuple>
@@ -246,7 +247,7 @@ void DefaultPrintNonContainerTo(const T& value, ::std::ostream* os) {
   // impossible to define #1 (e.g. when foo is ::std, defining
   // anything in it is undefined behavior unless you are a compiler
   // vendor.).
-  *os << value;
+  *os << ::llvm_gtest::printable(value);
 }
 
 }  // namespace testing_internal
diff --git a/utils/unittest/googletest/include/gtest/internal/custom/raw-ostream.h b/utils/unittest/googletest/include/gtest/internal/custom/raw-ostream.h
new file mode 100644
index 000000000000..fd993db16028
--- /dev/null
+++ b/utils/unittest/googletest/include/gtest/internal/custom/raw-ostream.h
@@ -0,0 +1,74 @@
+//===-- raw-ostream.h - Support for printing using raw_ostream --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This file is not part of gtest, but extends it to support LLVM libraries.
+// This is not a public API for testing - it's a detail of LLVM's gtest.
+//
+// gtest allows providing printers for custom types by defining operator<<.
+// In LLVM, operator<< usually takes llvm:raw_ostream& instead of std::ostream&.
+//
+// This file defines a template printable(V), which returns a version of V that
+// can be streamed into a std::ostream.
+//
+// This interface is chosen so that in the default case (printable(V) is V),
+// the main gtest code calls operator<<(OS, V) itself. gtest-printers carefully
+// controls the lookup to enable fallback printing (see testing::internal2).
+//===----------------------------------------------------------------------===//
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_RAW_OSTREAM_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_RAW_OSTREAM_H_
+
+namespace llvm_gtest {
+// StreamSwitch is a trait that tells us how to stream a T into a std::ostream.
+// By default, we just stream the T directly. We'll specialize this later.
+template <typename T, typename Enable = void> struct StreamSwitch {
+  static const T& printable(const T& V) { return V; }
+};
+
+// printable() returns a version of its argument that can be streamed into a
+// std::ostream. This may be the argument itself, or some other representation.
+template <typename T>
+auto printable(const T &V) -> decltype(StreamSwitch<T>::printable(V)) {
+  // We delegate to the trait, to allow partial specialization.
+  return StreamSwitch<T>::printable(V);
+}
+} // namespace llvm_gtest
+
+// If raw_ostream support is enabled, we specialize for types with operator<<
+// that takes a raw_ostream.
+#if !GTEST_NO_LLVM_RAW_OSTREAM
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/raw_os_ostream.h"
+#include <ostream>
+namespace llvm_gtest {
+
+// The printable() of a raw_ostream-enabled type T is a RawStreamProxy<T>.
+// It uses raw_os_ostream to write the wrapped value to a std::ostream.
+template <typename T>
+struct RawStreamProxy {
+  const T& V;
+  friend std::ostream &operator<<(std::ostream &S, const RawStreamProxy<T> &V) {
+    llvm::raw_os_ostream OS(S);
+    OS << V.V;
+    return S;
+  }
+};
+
+// We enable raw_ostream treatment if `(raw_ostream&) << (const T&)` is valid.
+// We don't want implicit conversions on the RHS (e.g. to bool!), so "consume"
+// the possible conversion by passing something convertible to const T& instead.
+template <typename T> struct ConvertibleTo { operator T(); };
+template <typename T>
+struct StreamSwitch<T, decltype((void)(std::declval<llvm::raw_ostream &>()
+                                       << ConvertibleTo<const T &>()))> {
+  static const RawStreamProxy<T> printable(const T &V) { return {V}; }
+};
+} // namespace llvm_gtest
+#endif  // !GTEST_NO_LLVM_RAW_OSTREAM
+
+#endif // GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_RAW_OSTREAM_H_
diff --git a/utils/unittest/googletest/include/gtest/internal/gtest-filepath.h b/utils/unittest/googletest/include/gtest/internal/gtest-filepath.h
index 7a13b4b0de60..184450686c51 100644
--- a/utils/unittest/googletest/include/gtest/internal/gtest-filepath.h
+++ b/utils/unittest/googletest/include/gtest/internal/gtest-filepath.h
@@ -192,7 +192,7 @@ class GTEST_API_ FilePath {
 
   void Normalize();
 
-  // Returns a pointer to the last occurence of a valid path separator in
+  // Returns a pointer to the last occurrence of a valid path separator in
   // the FilePath. On Windows, for example, both '/' and '\' are valid path
   // separators. Returns NULL if no path separator was found.
   const char* FindLastPathSeparator() const;
diff --git a/utils/unittest/googletest/src/gtest-filepath.cc b/utils/unittest/googletest/src/gtest-filepath.cc
index 0292dc11957e..3bb2754885be 100644
--- a/utils/unittest/googletest/src/gtest-filepath.cc
+++ b/utils/unittest/googletest/src/gtest-filepath.cc
@@ -130,7 +130,7 @@ FilePath FilePath::RemoveExtension(const char* extension) const {
   return *this;
 }
 
-// Returns a pointer to the last occurence of a valid path separator in
+// Returns a pointer to the last occurrence of a valid path separator in
 // the FilePath. On Windows, for example, both '/' and '\' are valid path
 // separators. Returns NULL if no path separator was found.
 const char* FilePath::FindLastPathSeparator() const {
diff --git a/utils/unittest/googletest/src/gtest-port.cc b/utils/unittest/googletest/src/gtest-port.cc
index e5bf3dd2be4b..6aeef4957cf5 100644
--- a/utils/unittest/googletest/src/gtest-port.cc
+++ b/utils/unittest/googletest/src/gtest-port.cc
@@ -496,7 +496,7 @@ class ThreadLocalRegistryImpl {
                                  FALSE,
                                  thread_id);
     GTEST_CHECK_(thread != NULL);
-    // We need to to pass a valid thread ID pointer into CreateThread for it
+    // We need to pass a valid thread ID pointer into CreateThread for it
     // to work correctly under Win98.
     DWORD watcher_thread_id;
     HANDLE watcher_thread = ::CreateThread(
diff --git a/utils/update_analyze_test_checks.py b/utils/update_analyze_test_checks.py
new file mode 100755
index 000000000000..b9175ae73274
--- /dev/null
+++ b/utils/update_analyze_test_checks.py
@@ -0,0 +1,191 @@
+#!/usr/bin/env python2.7
+
+"""A script to generate FileCheck statements for 'opt' analysis tests.
+
+This script is a utility to update LLVM opt analysis test cases with new
+FileCheck patterns. It can either update all of the tests in the file or
+a single test function.
+
+Example usage:
+$ update_analyze_test_checks.py --opt=../bin/opt test/foo.ll
+
+Workflow:
+1. Make a compiler patch that requires updating some number of FileCheck lines
+   in regression test files.
+2. Save the patch and revert it from your local work area.
+3. Update the RUN-lines in the affected regression tests to look canonical.
+   Example: "; RUN: opt < %s -analyze -cost-model -S | FileCheck %s"
+4. Refresh the FileCheck lines for either the entire file or select functions by
+   running this script.
+5. Commit the fresh baseline of checks.
+6. Apply your patch from step 1 and rebuild your local binaries.
+7. Re-run this script on affected regression tests.
+8. Check the diffs to ensure the script has done something reasonable.
+9. Submit a patch including the regression test diffs for review.
+
+A common pattern is to have the script insert complete checking of every
+instruction. Then, edit it down to only check the relevant instructions.
+The script is designed to make adding checks to a test case fast, it is *not*
+designed to be authoratitive about what constitutes a good test!
+"""
+
+import argparse
+import itertools
+import os         # Used to advertise this file's name ("autogenerated_note").
+import string
+import subprocess
+import sys
+import tempfile
+import re
+
+from UpdateTestChecks import common
+
+ADVERT = '; NOTE: Assertions have been autogenerated by '
+
+# RegEx: this is where the magic happens.
+
+IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@([\w-]+)\s*\(')
+
+
+
+
+
+def main():
+  from argparse import RawTextHelpFormatter
+  parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
+  parser.add_argument('-v', '--verbose', action='store_true',
+                      help='Show verbose output')
+  parser.add_argument('--opt-binary', default='opt',
+                      help='The opt binary used to generate the test case')
+  parser.add_argument(
+      '--function', help='The function in the test file to update')
+  parser.add_argument('tests', nargs='+')
+  args = parser.parse_args()
+
+  autogenerated_note = (ADVERT + 'utils/' + os.path.basename(__file__))
+
+  opt_basename = os.path.basename(args.opt_binary)
+  if (opt_basename != "opt"):
+    print >>sys.stderr, 'ERROR: Unexpected opt name: ' + opt_basename
+    sys.exit(1)
+
+  for test in args.tests:
+    if args.verbose:
+      print >>sys.stderr, 'Scanning for RUN lines in test file: %s' % (test,)
+    with open(test) as f:
+      input_lines = [l.rstrip() for l in f]
+
+    raw_lines = [m.group(1)
+                 for m in [common.RUN_LINE_RE.match(l) for l in input_lines] if m]
+    run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
+    for l in raw_lines[1:]:
+      if run_lines[-1].endswith("\\"):
+        run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
+      else:
+        run_lines.append(l)
+
+    if args.verbose:
+      print >>sys.stderr, 'Found %d RUN lines:' % (len(run_lines),)
+      for l in run_lines:
+        print >>sys.stderr, '  RUN: ' + l
+
+    prefix_list = []
+    for l in run_lines:
+      (tool_cmd, filecheck_cmd) = tuple([cmd.strip() for cmd in l.split('|', 1)])
+
+      if not tool_cmd.startswith(opt_basename + ' '):
+        print >>sys.stderr, 'WARNING: Skipping non-%s RUN line: %s' % (opt_basename, l)
+        continue
+
+      if not filecheck_cmd.startswith('FileCheck '):
+        print >>sys.stderr, 'WARNING: Skipping non-FileChecked RUN line: ' + l
+        continue
+
+      tool_cmd_args = tool_cmd[len(opt_basename):].strip()
+      tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
+
+      check_prefixes = [item for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
+                               for item in m.group(1).split(',')]
+      if not check_prefixes:
+        check_prefixes = ['CHECK']
+
+      # FIXME: We should use multiple check prefixes to common check lines. For
+      # now, we just ignore all but the last.
+      prefix_list.append((check_prefixes, tool_cmd_args))
+
+    func_dict = {}
+    for prefixes, _ in prefix_list:
+      for prefix in prefixes:
+        func_dict.update({prefix: dict()})
+    for prefixes, opt_args in prefix_list:
+      if args.verbose:
+        print >>sys.stderr, 'Extracted opt cmd: ' + opt_basename + ' ' + opt_args
+        print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes)
+
+      raw_tool_outputs = common.invoke_tool(args.opt_binary, opt_args, test)
+
+      # Split analysis outputs by "Printing analysis " declarations.
+      for raw_tool_output in re.split(r'Printing analysis ', raw_tool_outputs):
+        common.build_function_body_dictionary(
+          common.ANALYZE_FUNCTION_RE, common.scrub_body, [],
+          raw_tool_output, prefixes, func_dict, args.verbose)
+
+    is_in_function = False
+    is_in_function_start = False
+    prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes])
+    if args.verbose:
+      print >>sys.stderr, 'Rewriting FileCheck prefixes: %s' % (prefix_set,)
+    output_lines = []
+    output_lines.append(autogenerated_note)
+
+    for input_line in input_lines:
+      if is_in_function_start:
+        if input_line == '':
+          continue
+        if input_line.lstrip().startswith(';'):
+          m = common.CHECK_RE.match(input_line)
+          if not m or m.group(1) not in prefix_set:
+            output_lines.append(input_line)
+            continue
+
+        # Print out the various check lines here.
+        common.add_analyze_checks(output_lines, ';', prefix_list, func_dict, func_name)
+        is_in_function_start = False
+
+      if is_in_function:
+        if common.should_add_line_to_output(input_line, prefix_set):
+          # This input line of the function body will go as-is into the output.
+          # Except make leading whitespace uniform: 2 spaces.
+          input_line = common.SCRUB_LEADING_WHITESPACE_RE.sub(r'  ', input_line)
+          output_lines.append(input_line)
+        else:
+          continue
+        if input_line.strip() == '}':
+          is_in_function = False
+        continue
+
+      # Discard any previous script advertising.
+      if input_line.startswith(ADVERT):
+        continue
+
+      # If it's outside a function, it just gets copied to the output.
+      output_lines.append(input_line)
+
+      m = IR_FUNCTION_RE.match(input_line)
+      if not m:
+        continue
+      func_name = m.group(1)
+      if args.function is not None and func_name != args.function:
+        # When filtering on a specific function, skip all others.
+        continue
+      is_in_function = is_in_function_start = True
+
+    if args.verbose:
+      print>>sys.stderr, 'Writing %d lines to %s...' % (len(output_lines), test)
+
+    with open(test, 'wb') as f:
+      f.writelines([l + '\n' for l in output_lines])
+
+
+if __name__ == '__main__':
+  main()
diff --git a/utils/update_cc_test_checks.py b/utils/update_cc_test_checks.py
new file mode 100755
index 000000000000..865ebf7ff57b
--- /dev/null
+++ b/utils/update_cc_test_checks.py
@@ -0,0 +1,241 @@
+#!/usr/bin/env python3
+'''A utility to update LLVM IR CHECK lines in C/C++ FileCheck test files.
+
+Example RUN lines in .c/.cc test files:
+
+// RUN: %clang -emit-llvm -S %s -o - -O2 | FileCheck %s
+// RUN: %clangxx -emit-llvm -S %s -o - -O2 | FileCheck -check-prefix=CHECK-A %s
+
+Usage:
+
+% utils/update_cc_test_checks.py --llvm-bin=release/bin test/a.cc
+% utils/update_cc_test_checks.py --c-index-test=release/bin/c-index-test \
+  --clang=release/bin/clang /tmp/c/a.cc
+'''
+
+import argparse
+import collections
+import distutils.spawn
+import os
+import shlex
+import string
+import subprocess
+import sys
+import re
+import tempfile
+
+from UpdateTestChecks import asm, common
+
+ADVERT = '// NOTE: Assertions have been autogenerated by '
+
+CHECK_RE = re.compile(r'^\s*//\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
+RUN_LINE_RE = re.compile('^//\s*RUN:\s*(.*)$')
+
+SUBST = {
+    '%clang': [],
+    '%clang_cc1': ['-cc1'],
+    '%clangxx': ['--driver-mode=g++'],
+}
+
+def get_line2spell_and_mangled(args, clang_args):
+  ret = {}
+  with tempfile.NamedTemporaryFile() as f:
+    # TODO Make c-index-test print mangled names without circumventing through precompiled headers
+    status = subprocess.run([args.c_index_test, '-write-pch', f.name, *clang_args],
+                            stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+    if status.returncode:
+      sys.stderr.write(status.stdout.decode())
+      sys.exit(2)
+    output = subprocess.check_output([args.c_index_test,
+        '-test-print-mangle', f.name])
+    if sys.version_info[0] > 2:
+      output = output.decode()
+
+  RE = re.compile(r'^FunctionDecl=(\w+):(\d+):\d+ \(Definition\) \[mangled=([^]]+)\]')
+  for line in output.splitlines():
+    m = RE.match(line)
+    if not m: continue
+    spell, line, mangled = m.groups()
+    if mangled == '_' + spell:
+      # HACK for MacOS (where the mangled name includes an _ for C but the IR won't):
+      mangled = spell
+    # Note -test-print-mangle does not print file names so if #include is used,
+    # the line number may come from an included file.
+    ret[int(line)-1] = (spell, mangled)
+  if args.verbose:
+    for line, func_name in sorted(ret.items()):
+      print('line {}: found function {}'.format(line+1, func_name), file=sys.stderr)
+  return ret
+
+
+def config():
+  parser = argparse.ArgumentParser(
+      description=__doc__,
+      formatter_class=argparse.RawTextHelpFormatter)
+  parser.add_argument('-v', '--verbose', action='store_true')
+  parser.add_argument('--llvm-bin', help='llvm $prefix/bin path')
+  parser.add_argument('--clang',
+                      help='"clang" executable, defaults to $llvm_bin/clang')
+  parser.add_argument('--clang-args',
+                      help='Space-separated extra args to clang, e.g. --clang-args=-v')
+  parser.add_argument('--c-index-test',
+                      help='"c-index-test" executable, defaults to $llvm_bin/c-index-test')
+  parser.add_argument(
+      '--functions', nargs='+', help='A list of function name regexes. '
+      'If specified, update CHECK lines for functions matching at least one regex')
+  parser.add_argument(
+      '--x86_extra_scrub', action='store_true',
+      help='Use more regex for x86 matching to reduce diffs between various subtargets')
+  parser.add_argument('tests', nargs='+')
+  args = parser.parse_args()
+  args.clang_args = shlex.split(args.clang_args or '')
+
+  if args.clang is None:
+    if args.llvm_bin is None:
+      args.clang = 'clang'
+    else:
+      args.clang = os.path.join(args.llvm_bin, 'clang')
+  if not distutils.spawn.find_executable(args.clang):
+    print('Please specify --llvm-bin or --clang', file=sys.stderr)
+    sys.exit(1)
+  if args.c_index_test is None:
+    if args.llvm_bin is None:
+      args.c_index_test = 'c-index-test'
+    else:
+      args.c_index_test = os.path.join(args.llvm_bin, 'c-index-test')
+  if not distutils.spawn.find_executable(args.c_index_test):
+    print('Please specify --llvm-bin or --c-index-test', file=sys.stderr)
+    sys.exit(1)
+
+  return args
+
+
+def get_function_body(args, filename, clang_args, prefixes, triple_in_cmd, func_dict):
+  # TODO Clean up duplication of asm/common build_function_body_dictionary
+  # Invoke external tool and extract function bodies.
+  raw_tool_output = common.invoke_tool(args.clang, clang_args, filename)
+  if '-emit-llvm' in clang_args:
+    common.build_function_body_dictionary(
+            common.OPT_FUNCTION_RE, common.scrub_body, [],
+            raw_tool_output, prefixes, func_dict, args.verbose)
+  else:
+    print('The clang command line should include -emit-llvm as asm tests '
+          'are discouraged in Clang testsuite.', file=sys.stderr)
+    sys.exit(1)
+
+
+def main():
+  args = config()
+  autogenerated_note = (ADVERT + 'utils/' + os.path.basename(__file__))
+
+  for filename in args.tests:
+    with open(filename) as f:
+      input_lines = [l.rstrip() for l in f]
+
+    # Extract RUN lines.
+    raw_lines = [m.group(1)
+                 for m in [RUN_LINE_RE.match(l) for l in input_lines] if m]
+    run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
+    for l in raw_lines[1:]:
+      if run_lines[-1].endswith("\\"):
+        run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
+      else:
+        run_lines.append(l)
+
+    if args.verbose:
+      print('Found {} RUN lines:'.format(len(run_lines)), file=sys.stderr)
+      for l in run_lines:
+        print('  RUN: ' + l, file=sys.stderr)
+
+    # Build a list of clang command lines and check prefixes from RUN lines.
+    run_list = []
+    line2spell_and_mangled_list = collections.defaultdict(list)
+    for l in run_lines:
+      commands = [cmd.strip() for cmd in l.split('|', 1)]
+
+      triple_in_cmd = None
+      m = common.TRIPLE_ARG_RE.search(commands[0])
+      if m:
+        triple_in_cmd = m.groups()[0]
+
+      # Apply %clang substitution rule, replace %s by `filename`, and append args.clang_args
+      clang_args = shlex.split(commands[0])
+      if clang_args[0] not in SUBST:
+        print('WARNING: Skipping non-clang RUN line: ' + l, file=sys.stderr)
+        continue
+      clang_args[0:1] = SUBST[clang_args[0]]
+      clang_args = [filename if i == '%s' else i for i in clang_args] + args.clang_args
+
+      # Extract -check-prefix in FileCheck args
+      filecheck_cmd = commands[-1]
+      if not filecheck_cmd.startswith('FileCheck '):
+        print('WARNING: Skipping non-FileChecked RUN line: ' + l, file=sys.stderr)
+        continue
+      check_prefixes = [item for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
+                               for item in m.group(1).split(',')]
+      if not check_prefixes:
+        check_prefixes = ['CHECK']
+      run_list.append((check_prefixes, clang_args, triple_in_cmd))
+
+    # Strip CHECK lines which are in `prefix_set`, update test file.
+    prefix_set = set([prefix for p in run_list for prefix in p[0]])
+    input_lines = []
+    with open(filename, 'r+') as f:
+      for line in f:
+        m = CHECK_RE.match(line)
+        if not (m and m.group(1) in prefix_set) and line != '//\n':
+          input_lines.append(line)
+      f.seek(0)
+      f.writelines(input_lines)
+      f.truncate()
+
+    # Execute clang, generate LLVM IR, and extract functions.
+    func_dict = {}
+    for p in run_list:
+      prefixes = p[0]
+      for prefix in prefixes:
+        func_dict.update({prefix: dict()})
+    for prefixes, clang_args, triple_in_cmd in run_list:
+      if args.verbose:
+        print('Extracted clang cmd: clang {}'.format(clang_args), file=sys.stderr)
+        print('Extracted FileCheck prefixes: {}'.format(prefixes), file=sys.stderr)
+
+      get_function_body(args, filename, clang_args, prefixes, triple_in_cmd, func_dict)
+
+      # Invoke c-index-test to get mapping from start lines to mangled names.
+      # Forward all clang args for now.
+      for k, v in get_line2spell_and_mangled(args, clang_args).items():
+        line2spell_and_mangled_list[k].append(v)
+
+    output_lines = [autogenerated_note]
+    for idx, line in enumerate(input_lines):
+      # Discard any previous script advertising.
+      if line.startswith(ADVERT):
+        continue
+      if idx in line2spell_and_mangled_list:
+        added = set()
+        for spell, mangled in line2spell_and_mangled_list[idx]:
+          # One line may contain multiple function declarations.
+          # Skip if the mangled name has been added before.
+          # The line number may come from an included file,
+          # we simply require the spelling name to appear on the line
+          # to exclude functions from other files.
+          if mangled in added or spell not in line:
+            continue
+          if args.functions is None or any(re.search(regex, spell) for regex in args.functions):
+            if added:
+              output_lines.append('//')
+            added.add(mangled)
+            common.add_ir_checks(output_lines, '//', run_list, func_dict, mangled)
+      output_lines.append(line.rstrip('\n'))
+
+    # Update the test file.
+    with open(filename, 'w') as f:
+      for line in output_lines:
+        f.write(line + '\n')
+
+  return 0
+
+
+if __name__ == '__main__':
+  sys.exit(main())
diff --git a/utils/update_llc_test_checks.py b/utils/update_llc_test_checks.py
index 57d6e578259c..09b49a763b60 100755
--- a/utils/update_llc_test_checks.py
+++ b/utils/update_llc_test_checks.py
@@ -2,7 +2,7 @@
 
 """A test case update script.
 
-This script is a utility to update LLVM X86 'llc' based test cases with new
+This script is a utility to update LLVM 'llc' based test cases with new
 FileCheck patterns. It can either update all of the tests in the file or
 a single test function.
 """
@@ -14,280 +14,9 @@ import subprocess
 import sys
 import re
 
-# Invoke the tool that is being tested.
-def llc(args, cmd_args, ir):
-  with open(ir) as ir_file:
-    stdout = subprocess.check_output(args.llc_binary + ' ' + cmd_args,
-                                     shell=True, stdin=ir_file)
-  # Fix line endings to unix CR style.
-  stdout = stdout.replace('\r\n', '\n')
-  return stdout
-
-
-# RegEx: this is where the magic happens.
-
-ASM_FUNCTION_X86_RE = re.compile(
-    r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?'
-    r'(?P<body>^##?[ \t]+[^:]+:.*?)\s*'
-    r'^\s*(?:[^:\n]+?:\s*\n\s*\.size|\.cfi_endproc|\.globl|\.comm|\.(?:sub)?section|#+ -- End function)',
-    flags=(re.M | re.S))
-
-ASM_FUNCTION_ARM_RE = re.compile(
-        r'^(?P<func>[0-9a-zA-Z_]+):\n' # f: (name of function)
-        r'\s+\.fnstart\n' # .fnstart
-        r'(?P<body>.*?)\n' # (body of the function)
-        r'.Lfunc_end[0-9]+:', # .Lfunc_end0: or # -- End function
-        flags=(re.M | re.S))
-
-ASM_FUNCTION_AARCH64_RE = re.compile(
-     r'^_?(?P<func>[^:]+):[ \t]*\/\/[ \t]*@(?P=func)\n'
-     r'[ \t]+.cfi_startproc\n'
-     r'(?P<body>.*?)\n'
-     # This list is incomplete
-     r'.Lfunc_end[0-9]+:\n',
-     flags=(re.M | re.S))
-
-ASM_FUNCTION_MIPS_RE = re.compile(
-    r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?' # f: (name of func)
-    r'(?:^[ \t]+\.(frame|f?mask|set).*?\n)+'  # Mips+LLVM standard asm prologue
-    r'(?P<body>.*?)\n'                        # (body of the function)
-    r'(?:^[ \t]+\.(set|end).*?\n)+'           # Mips+LLVM standard asm epilogue
-    r'(\$|\.L)func_end[0-9]+:\n',             # $func_end0: (mips32 - O32) or
-                                              # .Lfunc_end0: (mips64 - NewABI)
-    flags=(re.M | re.S))
-
-ASM_FUNCTION_PPC_RE = re.compile(
-    r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n'
-    r'\.Lfunc_begin[0-9]+:\n'
-    r'[ \t]+.cfi_startproc\n'
-    r'(?:\.Lfunc_[gl]ep[0-9]+:\n(?:[ \t]+.*?\n)*)*'
-    r'(?P<body>.*?)\n'
-    # This list is incomplete
-    r'(?:^[ \t]*(?:\.long[ \t]+[^\n]+|\.quad[ \t]+[^\n]+)\n)*'
-    r'.Lfunc_end[0-9]+:\n',
-    flags=(re.M | re.S))
-
-ASM_FUNCTION_RISCV_RE = re.compile(
-    r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?'
-    r'(?P<body>^##?[ \t]+[^:]+:.*?)\s*'
-    r'.Lfunc_end[0-9]+:\n',
-    flags=(re.M | re.S))
-
-ASM_FUNCTION_SYSTEMZ_RE = re.compile(
-    r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n'
-    r'[ \t]+.cfi_startproc\n'
-    r'(?P<body>.*?)\n'
-    r'.Lfunc_end[0-9]+:\n',
-    flags=(re.M | re.S))
-
-
-SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
-SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
-SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
-SCRUB_LOOP_COMMENT_RE = re.compile(
-    r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M)
-
-SCRUB_X86_SHUFFLES_RE = (
-    re.compile(
-        r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem)( \{%k\d+\}( \{z\})?)? = .*)$',
-        flags=re.M))
-SCRUB_X86_SP_RE = re.compile(r'\d+\(%(esp|rsp)\)')
-SCRUB_X86_RIP_RE = re.compile(r'[.\w]+\(%rip\)')
-SCRUB_X86_LCP_RE = re.compile(r'\.LCPI[0-9]+_[0-9]+')
-SCRUB_X86_RET_RE = re.compile(r'ret[l|q]')
-
-RUN_LINE_RE = re.compile('^\s*;\s*RUN:\s*(.*)$')
-TRIPLE_ARG_RE = re.compile(r'-mtriple=([^ ]+)')
-TRIPLE_IR_RE = re.compile(r'^target\s+triple\s*=\s*"([^"]+)"$')
-IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@(\w+)\s*\(')
-CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?=(\S+)')
-CHECK_RE = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
-
-def scrub_asm_x86(asm, args):
-  # Scrub runs of whitespace out of the assembly, but leave the leading
-  # whitespace in place.
-  asm = SCRUB_WHITESPACE_RE.sub(r' ', asm)
-  # Expand the tabs used for indentation.
-  asm = string.expandtabs(asm, 2)
-  # Detect shuffle asm comments and hide the operands in favor of the comments.
-  asm = SCRUB_X86_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm)
-  # Generically match the stack offset of a memory operand.
-  asm = SCRUB_X86_SP_RE.sub(r'{{[0-9]+}}(%\1)', asm)
-  # Generically match a RIP-relative memory operand.
-  asm = SCRUB_X86_RIP_RE.sub(r'{{.*}}(%rip)', asm)
-  # Generically match a LCP symbol.
-  asm = SCRUB_X86_LCP_RE.sub(r'{{\.LCPI.*}}', asm)
-  if args.x86_extra_scrub:
-    # Avoid generating different checks for 32- and 64-bit because of 'retl' vs 'retq'.
-    asm = SCRUB_X86_RET_RE.sub(r'ret{{[l|q]}}', asm)
-  # Strip kill operands inserted into the asm.
-  asm = SCRUB_KILL_COMMENT_RE.sub('', asm)
-  # Strip trailing whitespace.
-  asm = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
-  return asm
-
-def scrub_asm_arm_eabi(asm, args):
-  # Scrub runs of whitespace out of the assembly, but leave the leading
-  # whitespace in place.
-  asm = SCRUB_WHITESPACE_RE.sub(r' ', asm)
-  # Expand the tabs used for indentation.
-  asm = string.expandtabs(asm, 2)
-  # Strip kill operands inserted into the asm.
-  asm = SCRUB_KILL_COMMENT_RE.sub('', asm)
-  # Strip trailing whitespace.
-  asm = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
-  return asm
-
-def scrub_asm_powerpc64(asm, args):
-  # Scrub runs of whitespace out of the assembly, but leave the leading
-  # whitespace in place.
-  asm = SCRUB_WHITESPACE_RE.sub(r' ', asm)
-  # Expand the tabs used for indentation.
-  asm = string.expandtabs(asm, 2)
-  # Stripe unimportant comments
-  asm = SCRUB_LOOP_COMMENT_RE.sub(r'', asm)
-  # Strip trailing whitespace.
-  asm = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
-  return asm
-
-def scrub_asm_mips(asm, args):
-  # Scrub runs of whitespace out of the assembly, but leave the leading
-  # whitespace in place.
-  asm = SCRUB_WHITESPACE_RE.sub(r' ', asm)
-  # Expand the tabs used for indentation.
-  asm = string.expandtabs(asm, 2)
-  # Strip trailing whitespace.
-  asm = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
-  return asm
-
-def scrub_asm_riscv(asm, args):
-  # Scrub runs of whitespace out of the assembly, but leave the leading
-  # whitespace in place.
-  asm = SCRUB_WHITESPACE_RE.sub(r' ', asm)
-  # Expand the tabs used for indentation.
-  asm = string.expandtabs(asm, 2)
-  # Strip trailing whitespace.
-  asm = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
-  return asm
-
-def scrub_asm_systemz(asm, args):
-  # Scrub runs of whitespace out of the assembly, but leave the leading
-  # whitespace in place.
-  asm = SCRUB_WHITESPACE_RE.sub(r' ', asm)
-  # Expand the tabs used for indentation.
-  asm = string.expandtabs(asm, 2)
-  # Strip trailing whitespace.
-  asm = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
-  return asm
-
-
-# Build up a dictionary of all the function bodies.
-def build_function_body_dictionary(raw_tool_output, triple, prefixes, func_dict,
-                                   args):
-  target_handlers = {
-      'x86_64': (scrub_asm_x86, ASM_FUNCTION_X86_RE),
-      'i686': (scrub_asm_x86, ASM_FUNCTION_X86_RE),
-      'x86': (scrub_asm_x86, ASM_FUNCTION_X86_RE),
-      'i386': (scrub_asm_x86, ASM_FUNCTION_X86_RE),
-      'aarch64': (scrub_asm_arm_eabi, ASM_FUNCTION_AARCH64_RE),
-      'arm-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'thumb-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'thumbv6': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'thumbv6-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'thumbv6t2': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'thumbv6t2-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'thumbv6m': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'thumbv6m-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'thumbv7': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'thumbv7-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'thumbv7m': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'thumbv7m-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'thumbv8-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'thumbv8m.base': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'thumbv8m.main': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'armv6': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'armv7': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'armv7-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'armeb-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'armv7eb-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'armv7eb': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'mips': (scrub_asm_mips, ASM_FUNCTION_MIPS_RE),
-      'powerpc64': (scrub_asm_powerpc64, ASM_FUNCTION_PPC_RE),
-      'powerpc64le': (scrub_asm_powerpc64, ASM_FUNCTION_PPC_RE),
-      'riscv32': (scrub_asm_riscv, ASM_FUNCTION_RISCV_RE),
-      'riscv64': (scrub_asm_riscv, ASM_FUNCTION_RISCV_RE),
-      's390x': (scrub_asm_systemz, ASM_FUNCTION_SYSTEMZ_RE),
-  }
-  handlers = None
-  for prefix, s in target_handlers.items():
-    if triple.startswith(prefix):
-      handlers = s
-      break
-  else:
-    raise KeyError('Triple %r is not supported' % (triple))
-
-  scrubber, function_re = handlers
-  for m in function_re.finditer(raw_tool_output):
-    if not m:
-      continue
-    func = m.group('func')
-    scrubbed_body = scrubber(m.group('body'), args)
-    if func.startswith('stress'):
-      # We only use the last line of the function body for stress tests.
-      scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
-    if args.verbose:
-      print >>sys.stderr, 'Processing function: ' + func
-      for l in scrubbed_body.splitlines():
-        print >>sys.stderr, '  ' + l
-    for prefix in prefixes:
-      if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body:
-        if prefix == prefixes[-1]:
-          print >>sys.stderr, ('WARNING: Found conflicting asm under the '
-                               'same prefix: %r!' % (prefix,))
-        else:
-          func_dict[prefix][func] = None
-          continue
+from UpdateTestChecks import asm, common
 
-      func_dict[prefix][func] = scrubbed_body
-
-
-def add_checks(output_lines, run_list, func_dict, func_name):
-  printed_prefixes = []
-  for p in run_list:
-    checkprefixes = p[0]
-    for checkprefix in checkprefixes:
-      if checkprefix in printed_prefixes:
-        break
-      if not func_dict[checkprefix][func_name]:
-        continue
-      # Add some space between different check prefixes.
-      if len(printed_prefixes) != 0:
-        output_lines.append(';')
-      printed_prefixes.append(checkprefix)
-      output_lines.append('; %s-LABEL: %s:' % (checkprefix, func_name))
-      func_body = func_dict[checkprefix][func_name].splitlines()
-      output_lines.append('; %s:       %s' % (checkprefix, func_body[0]))
-      for func_line in func_body[1:]:
-        output_lines.append('; %s-NEXT:  %s' % (checkprefix, func_line))
-      # Add space between different check prefixes and the first line of code.
-      # output_lines.append(';')
-      break
-  return output_lines
-
-
-def should_add_line_to_output(input_line, prefix_set):
-  # Skip any blank comment lines in the IR.
-  if input_line.strip() == ';':
-    return False
-  # Skip any blank lines in the IR.
-  #if input_line.strip() == '':
-  #  return False
-  # And skip any CHECK lines. We're building our own.
-  m = CHECK_RE.match(input_line)
-  if m and m.group(1) in prefix_set:
-    return False
-
-  return True
+ADVERT = '; NOTE: Assertions have been autogenerated by '
 
 
 def main():
@@ -299,13 +28,17 @@ def main():
   parser.add_argument(
       '--function', help='The function in the test file to update')
   parser.add_argument(
-      '--x86_extra_scrub', action='store_true',
+      '--extra_scrub', action='store_true',
+      help='Always use additional regex to further reduce diffs between various subtargets')
+  parser.add_argument(
+      '--x86_scrub_rip', action='store_true', default=True,
       help='Use more regex for x86 matching to reduce diffs between various subtargets')
+  parser.add_argument(
+      '--no_x86_scrub_rip', action='store_false', dest='x86_scrub_rip')
   parser.add_argument('tests', nargs='+')
   args = parser.parse_args()
 
-  autogenerated_note = ('; NOTE: Assertions have been autogenerated by '
-                        'utils/' + os.path.basename(__file__))
+  autogenerated_note = (ADVERT + 'utils/' + os.path.basename(__file__))
 
   for test in args.tests:
     if args.verbose:
@@ -315,13 +48,13 @@ def main():
 
     triple_in_ir = None
     for l in input_lines:
-      m = TRIPLE_IR_RE.match(l)
+      m = common.TRIPLE_IR_RE.match(l)
       if m:
         triple_in_ir = m.groups()[0]
         break
 
     raw_lines = [m.group(1)
-                 for m in [RUN_LINE_RE.match(l) for l in input_lines] if m]
+                 for m in [common.RUN_LINE_RE.match(l) for l in input_lines] if m]
     run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
     for l in raw_lines[1:]:
       if run_lines[-1].endswith("\\"):
@@ -340,7 +73,7 @@ def main():
       llc_cmd = commands[0]
 
       triple_in_cmd = None
-      m = TRIPLE_ARG_RE.search(llc_cmd)
+      m = common.TRIPLE_ARG_RE.search(llc_cmd)
       if m:
         triple_in_cmd = m.groups()[0]
 
@@ -358,7 +91,7 @@ def main():
       llc_cmd_args = llc_cmd[len('llc'):].strip()
       llc_cmd_args = llc_cmd_args.replace('< %s', '').replace('%s', '').strip()
 
-      check_prefixes = [item for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
+      check_prefixes = [item for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
                                for item in m.group(1).split(',')]
       if not check_prefixes:
         check_prefixes = ['CHECK']
@@ -377,12 +110,12 @@ def main():
         print >>sys.stderr, 'Extracted LLC cmd: llc ' + llc_args
         print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes)
 
-      raw_tool_output = llc(args, llc_args, test)
+      raw_tool_output = common.invoke_tool(args.llc_binary, llc_args, test)
       if not (triple_in_cmd or triple_in_ir):
         print >>sys.stderr, "Cannot find a triple. Assume 'x86'"
 
-      build_function_body_dictionary(raw_tool_output,
-          triple_in_cmd or triple_in_ir or 'x86', prefixes, func_dict, args)
+      asm.build_function_body_dictionary_for_triple(args, raw_tool_output,
+          triple_in_cmd or triple_in_ir or 'x86', prefixes, func_dict)
 
     is_in_function = False
     is_in_function_start = False
@@ -398,17 +131,17 @@ def main():
         if input_line == '':
           continue
         if input_line.lstrip().startswith(';'):
-          m = CHECK_RE.match(input_line)
+          m = common.CHECK_RE.match(input_line)
           if not m or m.group(1) not in prefix_set:
             output_lines.append(input_line)
             continue
 
         # Print out the various check lines here.
-        output_lines = add_checks(output_lines, run_list, func_dict, func_name)
+        asm.add_asm_checks(output_lines, ';', run_list, func_dict, func_name)
         is_in_function_start = False
 
       if is_in_function:
-        if should_add_line_to_output(input_line, prefix_set) == True:
+        if common.should_add_line_to_output(input_line, prefix_set):
           # This input line of the function body will go as-is into the output.
           output_lines.append(input_line)
         else:
@@ -417,13 +150,14 @@ def main():
           is_in_function = False
         continue
 
-      if input_line == autogenerated_note:
+      # Discard any previous script advertising.
+      if input_line.startswith(ADVERT):
         continue
 
       # If it's outside a function, it just gets copied to the output.
       output_lines.append(input_line)
 
-      m = IR_FUNCTION_RE.match(input_line)
+      m = common.IR_FUNCTION_RE.match(input_line)
       if not m:
         continue
       func_name = m.group(1)
diff --git a/utils/update_mca_test_checks.py b/utils/update_mca_test_checks.py
new file mode 100755
index 000000000000..18de299f1ce7
--- /dev/null
+++ b/utils/update_mca_test_checks.py
@@ -0,0 +1,505 @@
+#!/usr/bin/env python2.7
+
+"""A test case update script.
+
+This script is a utility to update LLVM 'llvm-mca' based test cases with new
+FileCheck patterns.
+"""
+
+import argparse
+from collections import defaultdict
+import glob
+import os
+import sys
+import warnings
+
+from UpdateTestChecks import common
+
+
+COMMENT_CHAR = '#'
+ADVERT_PREFIX = '{} NOTE: Assertions have been autogenerated by '.format(
+    COMMENT_CHAR)
+ADVERT = '{}utils/{}'.format(ADVERT_PREFIX, os.path.basename(__file__))
+
+
+class Error(Exception):
+  """ Generic Error that can be raised without printing a traceback.
+  """
+  pass
+
+
+def _warn(msg):
+  """ Log a user warning to stderr.
+  """
+  warnings.warn(msg, Warning, stacklevel=2)
+
+
+def _configure_warnings(args):
+  warnings.resetwarnings()
+  if args.w:
+    warnings.simplefilter('ignore')
+  if args.Werror:
+    warnings.simplefilter('error')
+
+
+def _showwarning(message, category, filename, lineno, file=None, line=None):
+  """ Version of warnings.showwarning that won't attempt to print out the
+      line at the location of the warning if the line text is not explicitly
+      specified.
+  """
+  if file is None:
+    file = sys.stderr
+  if line is None:
+    line = ''
+  file.write(warnings.formatwarning(message, category, filename, lineno, line))
+
+
+def _parse_args():
+  parser = argparse.ArgumentParser(description=__doc__)
+  parser.add_argument('-v', '--verbose',
+                      action='store_true',
+                      help='show verbose output')
+  parser.add_argument('-w',
+                      action='store_true',
+                      help='suppress warnings')
+  parser.add_argument('-Werror',
+                      action='store_true',
+                      help='promote warnings to errors')
+  parser.add_argument('--llvm-mca-binary',
+                      metavar='<path>',
+                      default='llvm-mca',
+                      help='the binary to use to generate the test case '
+                           '(default: llvm-mca)')
+  parser.add_argument('tests',
+                      metavar='<test-path>',
+                      nargs='+')
+  args = parser.parse_args()
+
+  _configure_warnings(args)
+
+  if not args.llvm_mca_binary:
+    raise Error('--llvm-mca-binary value cannot be empty string')
+
+  if os.path.basename(args.llvm_mca_binary) != 'llvm-mca':
+    _warn('unexpected binary name: {}'.format(args.llvm_mca_binary))
+
+  return args
+
+
+def _find_run_lines(input_lines, args):
+  raw_lines = [m.group(1)
+               for m in [common.RUN_LINE_RE.match(l) for l in input_lines]
+               if m]
+  run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
+  for l in raw_lines[1:]:
+    if run_lines[-1].endswith(r'\\'):
+      run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l
+    else:
+      run_lines.append(l)
+
+  if args.verbose:
+    sys.stderr.write('Found {} RUN line{}:\n'.format(
+        len(run_lines), '' if len(run_lines) == 1 else 's'))
+    for line in run_lines:
+      sys.stderr.write('  RUN: {}\n'.format(line))
+
+  return run_lines
+
+
+def _get_run_infos(run_lines, args):
+  run_infos = []
+  for run_line in run_lines:
+    try:
+      (tool_cmd, filecheck_cmd) = tuple([cmd.strip()
+                                        for cmd in run_line.split('|', 1)])
+    except ValueError:
+      _warn('could not split tool and filecheck commands: {}'.format(run_line))
+      continue
+
+    tool_basename = os.path.basename(args.llvm_mca_binary)
+
+    if not tool_cmd.startswith(tool_basename + ' '):
+      _warn('skipping non-{} RUN line: {}'.format(tool_basename, run_line))
+      continue
+
+    if not filecheck_cmd.startswith('FileCheck '):
+      _warn('skipping non-FileCheck RUN line: {}'.format(run_line))
+      continue
+
+    tool_cmd_args = tool_cmd[len(tool_basename):].strip()
+    tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
+
+    check_prefixes = [item
+                      for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
+                      for item in m.group(1).split(',')]
+    if not check_prefixes:
+      check_prefixes = ['CHECK']
+
+    run_infos.append((check_prefixes, tool_cmd_args))
+
+  return run_infos
+
+
+def _break_down_block(block_info, common_prefix):
+  """ Given a block_info, see if we can analyze it further to let us break it
+      down by prefix per-line rather than per-block.
+  """
+  texts = block_info.keys()
+  prefixes = list(block_info.values())
+  # Split the lines from each of the incoming block_texts and zip them so that
+  # each element contains the corresponding lines from each text.  E.g.
+  #
+  # block_text_1: A   # line 1
+  #               B   # line 2
+  #
+  # block_text_2: A   # line 1
+  #               C   # line 2
+  #
+  # would become:
+  #
+  # [(A, A),   # line 1
+  #  (B, C)]   # line 2
+  #
+  line_tuples = list(zip(*list((text.splitlines() for text in texts))))
+
+  # To simplify output, we'll only proceed if the very first line of the block
+  # texts is common to each of them.
+  if len(set(line_tuples[0])) != 1:
+    return []
+
+  result = []
+  lresult = defaultdict(list)
+  for i, line in enumerate(line_tuples):
+    if len(set(line)) == 1:
+      # We're about to output a line with the common prefix.  This is a sync
+      # point so flush any batched-up lines one prefix at a time to the output
+      # first.
+      for prefix in sorted(lresult):
+        result.extend(lresult[prefix])
+      lresult = defaultdict(list)
+
+      # The line is common to each block so output with the common prefix.
+      result.append((common_prefix, line[0]))
+    else:
+      # The line is not common to each block, or we don't have a common prefix.
+      # If there are no prefixes available, warn and bail out.
+      if not prefixes[0]:
+        _warn('multiple lines not disambiguated by prefixes:\n{}\n'
+              'Some blocks may be skipped entirely as a result.'.format(
+                  '\n'.join('  - {}'.format(l) for l in line)))
+        return []
+
+      # Iterate through the line from each of the blocks and add the line with
+      # the corresponding prefix to the current batch of results so that we can
+      # later output them per-prefix.
+      for i, l in enumerate(line):
+        for prefix in prefixes[i]:
+          lresult[prefix].append((prefix, l))
+
+  # Flush any remaining batched-up lines one prefix at a time to the output.
+  for prefix in sorted(lresult):
+    result.extend(lresult[prefix])
+  return result
+
+
+def _get_useful_prefix_info(run_infos):
+  """ Given the run_infos, calculate any prefixes that are common to every one,
+      and the length of the longest prefix string.
+  """
+  try:
+    all_sets = [set(s) for s in list(zip(*run_infos))[0]]
+    common_to_all = set.intersection(*all_sets)
+    longest_prefix_len = max(len(p) for p in set.union(*all_sets))
+  except IndexError:
+    common_to_all = []
+    longest_prefix_len = 0
+  else:
+    if len(common_to_all) > 1:
+      _warn('Multiple prefixes common to all RUN lines: {}'.format(
+          common_to_all))
+    if common_to_all:
+      common_to_all = sorted(common_to_all)[0]
+  return common_to_all, longest_prefix_len
+
+
+def _get_block_infos(run_infos, test_path, args, common_prefix):  # noqa
+  """ For each run line, run the tool with the specified args and collect the
+      output. We use the concept of 'blocks' for uniquing, where a block is
+      a series of lines of text with no more than one newline character between
+      each one.  For example:
+
+      This
+      is
+      one
+      block
+
+      This is
+      another block
+
+      This is yet another block
+
+      We then build up a 'block_infos' structure containing a dict where the
+      text of each block is the key and a list of the sets of prefixes that may
+      generate that particular block.  This then goes through a series of
+      transformations to minimise the amount of CHECK lines that need to be
+      written by taking advantage of common prefixes.
+  """
+
+  def _block_key(tool_args, prefixes):
+    """ Get a hashable key based on the current tool_args and prefixes.
+    """
+    return ' '.join([tool_args] + prefixes)
+
+  all_blocks = {}
+  max_block_len = 0
+
+  # Run the tool for each run line to generate all of the blocks.
+  for prefixes, tool_args in run_infos:
+    key = _block_key(tool_args, prefixes)
+    raw_tool_output = common.invoke_tool(args.llvm_mca_binary,
+                                         tool_args,
+                                         test_path)
+
+    # Replace any lines consisting of purely whitespace with empty lines.
+    raw_tool_output = '\n'.join(line if line.strip() else ''
+                                for line in raw_tool_output.splitlines())
+
+    # Split blocks, stripping all trailing whitespace, but keeping preceding
+    # whitespace except for newlines so that columns will line up visually.
+    all_blocks[key] = [b.lstrip('\n').rstrip()
+                       for b in raw_tool_output.split('\n\n')]
+    max_block_len = max(max_block_len, len(all_blocks[key]))
+
+  # If necessary, pad the lists of blocks with empty blocks so that they are
+  # all the same length.
+  for key in all_blocks:
+    len_to_pad = max_block_len - len(all_blocks[key])
+    all_blocks[key] += [''] * len_to_pad
+
+  # Create the block_infos structure where it is a nested dict in the form of:
+  # block number -> block text -> list of prefix sets
+  block_infos = defaultdict(lambda: defaultdict(list))
+  for prefixes, tool_args in run_infos:
+    key = _block_key(tool_args, prefixes)
+    for block_num, block_text in enumerate(all_blocks[key]):
+      block_infos[block_num][block_text].append(set(prefixes))
+
+  # Now go through the block_infos structure and attempt to smartly prune the
+  # number of prefixes per block to the minimal set possible to output.
+  for block_num in range(len(block_infos)):
+    # When there are multiple block texts for a block num, remove any
+    # prefixes that are common to more than one of them.
+    # E.g. [ [{ALL,FOO}] , [{ALL,BAR}] ] -> [ [{FOO}] , [{BAR}] ]
+    all_sets = [s for s in block_infos[block_num].values()]
+    pruned_sets = []
+
+    for i, setlist in enumerate(all_sets):
+      other_set_values = set([elem for j, setlist2 in enumerate(all_sets)
+                              for set_ in setlist2 for elem in set_
+                              if i != j])
+      pruned_sets.append([s - other_set_values for s in setlist])
+
+    for i, block_text in enumerate(block_infos[block_num]):
+
+      # When a block text matches multiple sets of prefixes, try removing any
+      # prefixes that aren't common to all of them.
+      # E.g. [ {ALL,FOO} , {ALL,BAR} ] -> [{ALL}]
+      common_values = set.intersection(*pruned_sets[i])
+      if common_values:
+        pruned_sets[i] = [common_values]
+
+      # Everything should be uniqued as much as possible by now.  Apply the
+      # newly pruned sets to the block_infos structure.
+      # If there are any blocks of text that still match multiple prefixes,
+      # output a warning.
+      current_set = set()
+      for s in pruned_sets[i]:
+        s = sorted(list(s))
+        if s:
+          current_set.add(s[0])
+          if len(s) > 1:
+            _warn('Multiple prefixes generating same output: {} '
+                  '(discarding {})'.format(','.join(s), ','.join(s[1:])))
+
+      block_infos[block_num][block_text] = sorted(list(current_set))
+
+    # If we have multiple block_texts, try to break them down further to avoid
+    # the case where we have very similar block_texts repeated after each
+    # other.
+    if common_prefix and len(block_infos[block_num]) > 1:
+      # We'll only attempt this if each of the block_texts have the same number
+      # of lines as each other.
+      same_num_Lines = (len(set(len(k.splitlines())
+                                for k in block_infos[block_num].keys())) == 1)
+      if same_num_Lines:
+        breakdown = _break_down_block(block_infos[block_num], common_prefix)
+        if breakdown:
+          block_infos[block_num] = breakdown
+
+  return block_infos
+
+
+def _write_block(output, block, not_prefix_set, common_prefix, prefix_pad):
+  """ Write an individual block, with correct padding on the prefixes.
+  """
+  end_prefix = ':     '
+  previous_prefix = None
+  num_lines_of_prefix = 0
+
+  for prefix, line in block:
+    if prefix in not_prefix_set:
+      _warn('not writing for prefix {0} due to presence of "{0}-NOT:" '
+            'in input file.'.format(prefix))
+      continue
+
+    # If the previous line isn't already blank and we're writing more than one
+    # line for the current prefix output a blank line first, unless either the
+    # current of previous prefix is common to all.
+    num_lines_of_prefix += 1
+    if prefix != previous_prefix:
+      if output and output[-1]:
+        if num_lines_of_prefix > 1 or any(p == common_prefix
+                                          for p in (prefix, previous_prefix)):
+          output.append('')
+      num_lines_of_prefix = 0
+      previous_prefix = prefix
+
+    output.append(
+        '{} {}{}{} {}'.format(COMMENT_CHAR,
+                              prefix,
+                              end_prefix,
+                              ' ' * (prefix_pad - len(prefix)),
+                              line).rstrip())
+    end_prefix = '-NEXT:'
+
+  output.append('')
+
+
+def _write_output(test_path, input_lines, prefix_list, block_infos,  # noqa
+                  args, common_prefix, prefix_pad):
+  prefix_set = set([prefix for prefixes, _ in prefix_list
+                    for prefix in prefixes])
+  not_prefix_set = set()
+
+  output_lines = []
+  for input_line in input_lines:
+    if input_line.startswith(ADVERT_PREFIX):
+      continue
+
+    if input_line.startswith(COMMENT_CHAR):
+      m = common.CHECK_RE.match(input_line)
+      try:
+        prefix = m.group(1)
+      except AttributeError:
+        prefix = None
+
+      if '{}-NOT:'.format(prefix) in input_line:
+        not_prefix_set.add(prefix)
+
+      if prefix not in prefix_set or prefix in not_prefix_set:
+        output_lines.append(input_line)
+        continue
+
+    if common.should_add_line_to_output(input_line, prefix_set):
+      # This input line of the function body will go as-is into the output.
+      # Except make leading whitespace uniform: 2 spaces.
+      input_line = common.SCRUB_LEADING_WHITESPACE_RE.sub(r'  ', input_line)
+
+      # Skip empty lines if the previous output line is also empty.
+      if input_line or output_lines[-1]:
+        output_lines.append(input_line)
+    else:
+      continue
+
+  # Add a blank line before the new checks if required.
+  if len(output_lines) > 0 and output_lines[-1]:
+    output_lines.append('')
+
+  output_check_lines = []
+  for block_num in range(len(block_infos)):
+    for block_text in sorted(block_infos[block_num]):
+      if not block_text:
+        continue
+
+      if type(block_infos[block_num]) is list:
+        # The block is of the type output from _break_down_block().
+        _write_block(output_check_lines,
+                     block_infos[block_num],
+                     not_prefix_set,
+                     common_prefix,
+                     prefix_pad)
+        break
+      elif block_infos[block_num][block_text]:
+        # _break_down_block() was unable to do do anything so output the block
+        # as-is.
+        lines = block_text.split('\n')
+        for prefix in block_infos[block_num][block_text]:
+          _write_block(output_check_lines,
+                       [(prefix, line) for line in lines],
+                       not_prefix_set,
+                       common_prefix,
+                       prefix_pad)
+
+  if output_check_lines:
+    output_lines.insert(0, ADVERT)
+    output_lines.extend(output_check_lines)
+
+  # The file should not end with two newlines. It creates unnecessary churn.
+  while len(output_lines) > 0 and output_lines[-1] == '':
+    output_lines.pop()
+
+  if input_lines == output_lines:
+    sys.stderr.write('            [unchanged]\n')
+    return
+  sys.stderr.write('      [{} lines total]\n'.format(len(output_lines)))
+
+  if args.verbose:
+    sys.stderr.write(
+        'Writing {} lines to {}...\n\n'.format(len(output_lines), test_path))
+
+  with open(test_path, 'wb') as f:
+    f.writelines(['{}\n'.format(l).encode() for l in output_lines])
+
+def main():
+  args = _parse_args()
+  test_paths = [test for pattern in args.tests for test in glob.glob(pattern)]
+  for test_path in test_paths:
+    sys.stderr.write('Test: {}\n'.format(test_path))
+
+    # Call this per test. By default each warning will only be written once
+    # per source location. Reset the warning filter so that now each warning
+    # will be written once per source location per test.
+    _configure_warnings(args)
+
+    if args.verbose:
+      sys.stderr.write(
+          'Scanning for RUN lines in test file: {}\n'.format(test_path))
+
+    if not os.path.isfile(test_path):
+      raise Error('could not find test file: {}'.format(test_path))
+
+    with open(test_path) as f:
+      input_lines = [l.rstrip() for l in f]
+
+    run_lines = _find_run_lines(input_lines, args)
+    run_infos = _get_run_infos(run_lines, args)
+    common_prefix, prefix_pad = _get_useful_prefix_info(run_infos)
+    block_infos = _get_block_infos(run_infos, test_path, args, common_prefix)
+    _write_output(test_path,
+                  input_lines,
+                  run_infos,
+                  block_infos,
+                  args,
+                  common_prefix,
+                  prefix_pad)
+
+  return 0
+
+
+if __name__ == '__main__':
+  try:
+    warnings.showwarning = _showwarning
+    sys.exit(main())
+  except Error as e:
+    sys.stdout.write('error: {}\n'.format(e))
+    sys.exit(1)
diff --git a/utils/update_mir_test_checks.py b/utils/update_mir_test_checks.py
index 3756af1b517d..56d236de2491 100755
--- a/utils/update_mir_test_checks.py
+++ b/utils/update_mir_test_checks.py
@@ -26,12 +26,7 @@ import re
 import subprocess
 import sys
 
-RUN_LINE_RE = re.compile('^\s*[;#]\s*RUN:\s*(.*)$')
-TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)')
-MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)')
-TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
-CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?[= ](\S+)')
-CHECK_RE = re.compile(r'^\s*[;#]\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
+from UpdateTestChecks import common
 
 MIR_FUNC_NAME_RE = re.compile(r' *name: *(?P<func>[A-Za-z0-9_.-]+)')
 MIR_BODY_BEGIN_RE = re.compile(r' *body: *\|')
@@ -41,23 +36,22 @@ VREG_DEF_RE = re.compile(
     r'^ *(?P<vregs>{0}(?:, {0})*) '
     r'= (?P<opcode>[A-Zt][A-Za-z0-9_]+)'.format(VREG_RE.pattern))
 MIR_PREFIX_DATA_RE = re.compile(r'^ *(;|bb.[0-9].*: *$|[a-z]+:( |$)|$)')
-VREG_CLASS_RE = re.compile(r'^ *- *{ id: ([0-9]+), class: ([a-z0-9_]+)', re.M)
 
 IR_FUNC_NAME_RE = re.compile(
-    r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>\w+)\s*\(')
+    r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[A-Za-z0-9_.]+)\s*\(')
 IR_PREFIX_DATA_RE = re.compile(r'^ *(;|$)')
 
 MIR_FUNC_RE = re.compile(
     r'^---$'
     r'\n'
     r'^ *name: *(?P<func>[A-Za-z0-9_.-]+)$'
-    r'(?:.*?(?P<vregs>^ *registers: *(?:\n *- {[^\n]+$)*))?'
     r'.*?'
     r'^ *body: *\|\n'
     r'(?P<body>.*?)\n'
     r'^\.\.\.$',
     flags=(re.M | re.S))
 
+
 class LLC:
     def __init__(self, bin):
         self.bin = bin
@@ -96,7 +90,7 @@ def warn(msg, test_file=None):
 
 def find_triple_in_ir(lines, verbose=False):
     for l in lines:
-        m = TRIPLE_IR_RE.match(l)
+        m = common.TRIPLE_IR_RE.match(l)
         if m:
             return m.group(1)
     return None
@@ -104,7 +98,7 @@ def find_triple_in_ir(lines, verbose=False):
 
 def find_run_lines(test, lines, verbose=False):
     raw_lines = [m.group(1)
-                 for m in [RUN_LINE_RE.match(l) for l in lines] if m]
+                 for m in [common.RUN_LINE_RE.match(l) for l in lines] if m]
     run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
     for l in raw_lines[1:]:
         if run_lines[-1].endswith("\\"):
@@ -135,19 +129,21 @@ def build_run_list(test, run_lines, verbose=False):
             continue
 
         triple = None
-        m = TRIPLE_ARG_RE.search(llc_cmd)
+        m = common.TRIPLE_ARG_RE.search(llc_cmd)
         if m:
             triple = m.group(1)
         # If we find -march but not -mtriple, use that.
-        m = MARCH_ARG_RE.search(llc_cmd)
+        m = common.MARCH_ARG_RE.search(llc_cmd)
         if m and not triple:
             triple = '{}--'.format(m.group(1))
 
         cmd_args = llc_cmd[len('llc'):].strip()
         cmd_args = cmd_args.replace('< %s', '').replace('%s', '').strip()
 
-        check_prefixes = [item for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
-                          for item in m.group(1).split(',')]
+        check_prefixes = [
+            item
+            for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
+            for item in m.group(1).split(',')]
         if not check_prefixes:
             check_prefixes = ['CHECK']
         all_prefixes += check_prefixes
@@ -196,11 +192,10 @@ def build_function_body_dictionary(test, raw_tool_output, triple, prefixes,
                 warn('Found conflicting asm for prefix: {}'.format(prefix),
                      test_file=test)
             func_dict[prefix][func] = body
-            func_dict[prefix]['{}:vregs'.format(func)] = m.group('vregs')
 
 
 def add_checks_for_function(test, output_lines, run_list, func_dict, func_name,
-                            add_vreg_checks, single_bb, verbose=False):
+                            single_bb, verbose=False):
     printed_prefixes = set()
     for run in run_list:
         for prefix in run.prefixes:
@@ -213,17 +208,14 @@ def add_checks_for_function(test, output_lines, run_list, func_dict, func_name,
             #     output_lines.append('')
             printed_prefixes.add(prefix)
             log('Adding {} lines for {}'.format(prefix, func_name), verbose)
-            vregs = None
-            if add_vreg_checks:
-                vregs = func_dict[prefix]['{}:vregs'.format(func_name)]
             add_check_lines(test, output_lines, prefix, func_name, single_bb,
-                            func_dict[prefix][func_name].splitlines(), vregs)
+                            func_dict[prefix][func_name].splitlines())
             break
     return output_lines
 
 
 def add_check_lines(test, output_lines, prefix, func_name, single_bb,
-                    func_body, vreg_data):
+                    func_body):
     if single_bb:
         # Don't bother checking the basic block label for a single BB
         func_body.pop(0)
@@ -240,12 +232,6 @@ def add_check_lines(test, output_lines, prefix, func_name, single_bb,
 
     output_lines.append('{}-LABEL: name: {}'.format(check, func_name))
 
-    if vreg_data:
-        output_lines.append('{}: registers:'.format(check))
-        for m in VREG_CLASS_RE.finditer(vreg_data):
-            output_lines.append('{}-NEXT: id: {}, class: {}'.format(
-                check, m.group(1), m.group(2)))
-
     vreg_map = {}
     for func_line in func_body:
         if not func_line.strip():
@@ -298,14 +284,13 @@ def mangle_vreg(opcode, current_names):
 
 def should_add_line_to_output(input_line, prefix_set):
     # Skip any check lines that we're handling.
-    m = CHECK_RE.match(input_line)
+    m = common.CHECK_RE.match(input_line)
     if m and m.group(1) in prefix_set:
         return False
     return True
 
 
-def update_test_file(llc, test, remove_common_prefixes=False,
-                     add_vreg_checks=False, verbose=False):
+def update_test_file(llc, test, remove_common_prefixes=False, verbose=False):
     log('Scanning for RUN lines in test file: {}'.format(test), verbose)
     with open(test) as fd:
         input_lines = [l.rstrip() for l in fd]
@@ -360,7 +345,7 @@ def update_test_file(llc, test, remove_common_prefixes=False,
             if m:
                 state = 'ir function prefix'
                 func_name = m.group('func')
-            if input_line.strip() == '---':
+            if input_line.rstrip('| \r\n') == '---':
                 state = 'document'
             output_lines.append(input_line)
         elif state == 'document':
@@ -384,15 +369,15 @@ def update_test_file(llc, test, remove_common_prefixes=False,
                     continue
                 state = 'mir function body'
                 add_checks_for_function(test, output_lines, run_list,
-                                        func_dict, func_name, add_vreg_checks,
-                                        single_bb=False, verbose=verbose)
+                                        func_dict, func_name, single_bb=False,
+                                        verbose=verbose)
         elif state == 'mir function prefix':
             m = MIR_PREFIX_DATA_RE.match(input_line)
             if not m:
                 state = 'mir function body'
                 add_checks_for_function(test, output_lines, run_list,
-                                        func_dict, func_name, add_vreg_checks,
-                                        single_bb=True, verbose=verbose)
+                                        func_dict, func_name, single_bb=True,
+                                        verbose=verbose)
 
             if should_add_line_to_output(input_line, prefix_set):
                 output_lines.append(input_line)
@@ -407,8 +392,8 @@ def update_test_file(llc, test, remove_common_prefixes=False,
             if not m:
                 state = 'ir function body'
                 add_checks_for_function(test, output_lines, run_list,
-                                        func_dict, func_name, add_vreg_checks,
-                                        single_bb=False, verbose=verbose)
+                                        func_dict, func_name, single_bb=False,
+                                        verbose=verbose)
 
             if should_add_line_to_output(input_line, prefix_set):
                 output_lines.append(input_line)
@@ -436,15 +421,13 @@ def main():
     parser.add_argument('--remove-common-prefixes', action='store_true',
                         help='Remove existing check lines whose prefixes are '
                              'shared between multiple commands')
-    parser.add_argument('--add-vreg-checks', action='store_true',
-                        help='Add checks for the "registers:" block')
     parser.add_argument('tests', nargs='+')
     args = parser.parse_args()
 
     for test in args.tests:
         try:
             update_test_file(args.llc, test, args.remove_common_prefixes,
-                             args.add_vreg_checks, verbose=args.verbose)
+                             verbose=args.verbose)
         except Exception:
             warn('Error processing file', test_file=test)
             raise
diff --git a/utils/update_test_checks.py b/utils/update_test_checks.py
index 073d43d8e4da..739fe045835a 100755
--- a/utils/update_test_checks.py
+++ b/utils/update_test_checks.py
@@ -38,194 +38,16 @@ import sys
 import tempfile
 import re
 
+from UpdateTestChecks import common
+
 ADVERT = '; NOTE: Assertions have been autogenerated by '
 
 # RegEx: this is where the magic happens.
 
-SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
-SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
-SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
-SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
-SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
-
-RUN_LINE_RE = re.compile('^\s*;\s*RUN:\s*(.*)$')
 IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@([\w-]+)\s*\(')
-OPT_FUNCTION_RE = re.compile(
-    r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*\('
-    r'(\s+)?[^)]*[^{]*\{\n(?P<body>.*?)^\}$',
-    flags=(re.M | re.S))
-CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?=(\S+)')
-CHECK_RE = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
-# Match things that look at identifiers, but only if they are followed by
-# spaces, commas, paren, or end of the string
-IR_VALUE_RE = re.compile(r'(\s+)%([\w\.]+?)([,\s\(\)]|\Z)')
-
-
-# Invoke the tool that is being tested.
-def invoke_tool(args, cmd_args, ir):
-  with open(ir) as ir_file:
-    stdout = subprocess.check_output(args.opt_binary + ' ' + cmd_args,
-                                     shell=True, stdin=ir_file)
-  # Fix line endings to unix CR style.
-  stdout = stdout.replace('\r\n', '\n')
-  return stdout
-
-
-def scrub_body(body, opt_basename):
-  # Scrub runs of whitespace out of the assembly, but leave the leading
-  # whitespace in place.
-  body = SCRUB_WHITESPACE_RE.sub(r' ', body)
-  # Expand the tabs used for indentation.
-  body = string.expandtabs(body, 2)
-  # Strip trailing whitespace.
-  body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body)
-  return body
-
-
-# Build up a dictionary of all the function bodies.
-def build_function_body_dictionary(raw_tool_output, prefixes, func_dict, verbose, opt_basename):
-  func_regex = OPT_FUNCTION_RE
-  for m in func_regex.finditer(raw_tool_output):
-    if not m:
-      continue
-    func = m.group('func')
-    scrubbed_body = scrub_body(m.group('body'), opt_basename)
-    if func.startswith('stress'):
-      # We only use the last line of the function body for stress tests.
-      scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
-    if verbose:
-      print >>sys.stderr, 'Processing function: ' + func
-      for l in scrubbed_body.splitlines():
-        print >>sys.stderr, '  ' + l
-    for prefix in prefixes:
-      if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body:
-        if prefix == prefixes[-1]:
-          print >>sys.stderr, ('WARNING: Found conflicting asm under the '
-                               'same prefix: %r!' % (prefix,))
-        else:
-          func_dict[prefix][func] = None
-          continue
 
-      func_dict[prefix][func] = scrubbed_body
-
-
-# Create a FileCheck variable name based on an IR name.
-def get_value_name(var):
-  if var.isdigit():
-    var = 'TMP' + var
-  var = var.replace('.', '_')
-  return var.upper()
-
-
-# Create a FileCheck variable from regex.
-def get_value_definition(var):
-  return '[[' + get_value_name(var) + ':%.*]]'
-
-
-# Use a FileCheck variable.
-def get_value_use(var):
-  return '[[' + get_value_name(var) + ']]'
-
-# Replace IR value defs and uses with FileCheck variables.
-def genericize_check_lines(lines):
-  # This gets called for each match that occurs in
-  # a line. We transform variables we haven't seen
-  # into defs, and variables we have seen into uses.
-  def transform_line_vars(match):
-    var = match.group(2)
-    if var in vars_seen:
-      rv = get_value_use(var)
-    else:
-      vars_seen.add(var)
-      rv = get_value_definition(var)
-    # re.sub replaces the entire regex match
-    # with whatever you return, so we have
-    # to make sure to hand it back everything
-    # including the commas and spaces.
-    return match.group(1) + rv + match.group(3)
-
-  vars_seen = set()
-  lines_with_def = []
-
-  for i, line in enumerate(lines):
-    # An IR variable named '%.' matches the FileCheck regex string.
-    line = line.replace('%.', '%dot')
-    # Ignore any comments, since the check lines will too.
-    scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
-    lines[i] =  IR_VALUE_RE.sub(transform_line_vars, scrubbed_line)
-  return lines
-
-
-def add_checks(output_lines, prefix_list, func_dict, func_name, opt_basename):
-  # Label format is based on IR string.
-  check_label_format = "; %s-LABEL: @%s("
-
-  printed_prefixes = []
-  for checkprefixes, _ in prefix_list:
-    for checkprefix in checkprefixes:
-      if checkprefix in printed_prefixes:
-        break
-      if not func_dict[checkprefix][func_name]:
-        continue
-      # Add some space between different check prefixes, but not after the last
-      # check line (before the test code).
-      #if len(printed_prefixes) != 0:
-      #  output_lines.append(';')
-      printed_prefixes.append(checkprefix)
-      output_lines.append(check_label_format % (checkprefix, func_name))
-      func_body = func_dict[checkprefix][func_name].splitlines()
-
-      # For IR output, change all defs to FileCheck variables, so we're immune
-      # to variable naming fashions.
-      func_body = genericize_check_lines(func_body)
-
-      # This could be selectively enabled with an optional invocation argument.
-      # Disabled for now: better to check everything. Be safe rather than sorry.
-
-      # Handle the first line of the function body as a special case because
-      # it's often just noise (a useless asm comment or entry label).
-      #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
-      #  is_blank_line = True
-      #else:
-      #  output_lines.append('; %s:       %s' % (checkprefix, func_body[0]))
-      #  is_blank_line = False
-
-      is_blank_line = False
-
-      for func_line in func_body:
-        if func_line.strip() == '':
-          is_blank_line = True
-          continue
-        # Do not waste time checking IR comments.
-        func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
-
-        # Skip blank lines instead of checking them.
-        if is_blank_line == True:
-          output_lines.append('; %s:       %s' % (checkprefix, func_line))
-        else:
-          output_lines.append('; %s-NEXT:  %s' % (checkprefix, func_line))
-        is_blank_line = False
-
-      # Add space between different check prefixes and also before the first
-      # line of code in the test function.
-      output_lines.append(';')
-      break
-  return output_lines
 
 
-def should_add_line_to_output(input_line, prefix_set):
-  # Skip any blank comment lines in the IR.
-  if input_line.strip() == ';':
-    return False
-  # Skip any blank lines in the IR.
-  #if input_line.strip() == '':
-  #  return False
-  # And skip any CHECK lines. We're building our own.
-  m = CHECK_RE.match(input_line)
-  if m and m.group(1) in prefix_set:
-    return False
-
-  return True
 
 
 def main():
@@ -254,7 +76,7 @@ def main():
       input_lines = [l.rstrip() for l in f]
 
     raw_lines = [m.group(1)
-                 for m in [RUN_LINE_RE.match(l) for l in input_lines] if m]
+                 for m in [common.RUN_LINE_RE.match(l) for l in input_lines] if m]
     run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
     for l in raw_lines[1:]:
       if run_lines[-1].endswith("\\"):
@@ -282,7 +104,7 @@ def main():
       tool_cmd_args = tool_cmd[len(opt_basename):].strip()
       tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
 
-      check_prefixes = [item for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
+      check_prefixes = [item for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
                                for item in m.group(1).split(',')]
       if not check_prefixes:
         check_prefixes = ['CHECK']
@@ -300,8 +122,10 @@ def main():
         print >>sys.stderr, 'Extracted opt cmd: ' + opt_basename + ' ' + opt_args
         print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes)
 
-      raw_tool_output = invoke_tool(args, opt_args, test)
-      build_function_body_dictionary(raw_tool_output, prefixes, func_dict, args.verbose, opt_basename)
+      raw_tool_output = common.invoke_tool(args.opt_binary, opt_args, test)
+      common.build_function_body_dictionary(
+              common.OPT_FUNCTION_RE, common.scrub_body, [],
+              raw_tool_output, prefixes, func_dict, args.verbose)
 
     is_in_function = False
     is_in_function_start = False
@@ -316,20 +140,20 @@ def main():
         if input_line == '':
           continue
         if input_line.lstrip().startswith(';'):
-          m = CHECK_RE.match(input_line)
+          m = common.CHECK_RE.match(input_line)
           if not m or m.group(1) not in prefix_set:
             output_lines.append(input_line)
             continue
 
         # Print out the various check lines here.
-        output_lines = add_checks(output_lines, prefix_list, func_dict, name, opt_basename)
+        common.add_ir_checks(output_lines, ';', prefix_list, func_dict, func_name)
         is_in_function_start = False
 
       if is_in_function:
-        if should_add_line_to_output(input_line, prefix_set) == True:
+        if common.should_add_line_to_output(input_line, prefix_set):
           # This input line of the function body will go as-is into the output.
           # Except make leading whitespace uniform: 2 spaces.
-          input_line = SCRUB_LEADING_WHITESPACE_RE.sub(r'  ', input_line)
+          input_line = common.SCRUB_LEADING_WHITESPACE_RE.sub(r'  ', input_line)
           output_lines.append(input_line)
         else:
           continue
@@ -347,8 +171,8 @@ def main():
       m = IR_FUNCTION_RE.match(input_line)
       if not m:
         continue
-      name = m.group(1)
-      if args.function is not None and name != args.function:
+      func_name = m.group(1)
+      if args.function is not None and func_name != args.function:
         # When filtering on a specific function, skip all others.
         continue
       is_in_function = is_in_function_start = True
@@ -362,4 +186,3 @@ def main():
 
 if __name__ == '__main__':
   main()
-
diff --git a/utils/vim/syntax/llvm.vim b/utils/vim/syntax/llvm.vim
index 42a4cf3cf495..d58ffb216d56 100644
--- a/utils/vim/syntax/llvm.vim
+++ b/utils/vim/syntax/llvm.vim
@@ -1,7 +1,7 @@
 " Vim syntax file
 " Language:   llvm
 " Maintainer: The LLVM team, http://llvm.org/
-" Version:      $Revision: 310885 $
+" Version:      $Revision: 328211 $
 
 if version < 600
   syntax clear
@@ -75,6 +75,8 @@ syn keyword llvmKeyword
       \ distinct
       \ dllexport
       \ dllimport
+      \ dso_local
+      \ dso_preemptable
       \ except
       \ external
       \ externally_initialized
@@ -135,7 +137,7 @@ syn keyword llvmKeyword
       \ seq_cst
       \ sideeffect
       \ signext
-      \ singlethread
+      \ syncscope
       \ source_filename
       \ speculatable
       \ spir_func
@@ -199,6 +201,7 @@ syn match  llvmSpecialComment /;\s*PR\d*\s*$/
 syn match  llvmSpecialComment /;\s*REQUIRES:.*$/
 syn match  llvmSpecialComment /;\s*RUN:.*$/
 syn match  llvmSpecialComment /;\s*CHECK:.*$/
+syn match  llvmSpecialComment "\v;\s*CHECK-(NEXT|NOT|DAG|SAME|LABEL):.*$"
 syn match  llvmSpecialComment /;\s*XFAIL:.*$/
 
 if version >= 508 || !exists("did_c_syn_inits")
diff --git a/utils/yaml-bench/YAMLBench.cpp b/utils/yaml-bench/YAMLBench.cpp
index 721eeeecfd98..3688db6f438b 100644
--- a/utils/yaml-bench/YAMLBench.cpp
+++ b/utils/yaml-bench/YAMLBench.cpp
@@ -68,7 +68,7 @@ static raw_ostream &operator <<(raw_ostream &os, const indent &in) {
   return os;
 }
 
-/// \brief Pretty print a tag by replacing tag:yaml.org,2002: with !!.
+/// Pretty print a tag by replacing tag:yaml.org,2002: with !!.
 static std::string prettyTag(yaml::Node *N) {
   std::string Tag = N->getVerbatimTag();
   if (StringRef(Tag).startswith("tag:yaml.org,2002:")) {