67 files changed, 4694 insertions, 881 deletions
diff --git a/utils/FileCheck/FileCheck.cpp b/utils/FileCheck/FileCheck.cpp
index 9e177574625f6..f563cadc92c3d 100644
--- a/utils/FileCheck/FileCheck.cpp
+++ b/utils/FileCheck/FileCheck.cpp
@@ -10,7 +10,7 @@
 // FileCheck does a line-by line check of a file that validates whether it
 // contains the expected content.  This is useful for regression tests etc.
 //
-// This program exits with an error status of 2 on error, exit status of 0 if
+// This program exits with an exit status of 2 on error, exit status of 0 if
 // the file matched the expected contents, and exit status of 1 if it did not
 // contain the expected contents.
 //
@@ -73,6 +73,12 @@ static cl::opt<bool> MatchFullLines(
              "Allows leading and trailing whitespace if --strict-whitespace\n"
              "is not also passed."));
 
+static cl::opt<bool> EnableVarScope(
+    "enable-var-scope", cl::init(false),
+    cl::desc("Enables scope for regex variables. Variables with names that\n"
+             "do not start with '$' will be reset at the beginning of\n"
+             "each CHECK-LABEL block."));
+
 typedef cl::list<std::string>::const_iterator prefix_iterator;
 
 //===----------------------------------------------------------------------===//
@@ -263,15 +269,19 @@ bool Pattern::ParsePattern(StringRef PatternStr, StringRef Prefix,
       // is relaxed, more strict check is performed in \c EvaluateExpression.
       bool IsExpression = false;
       for (unsigned i = 0, e = Name.size(); i != e; ++i) {
-        if (i == 0 && Name[i] == '@') {
-          if (NameEnd != StringRef::npos) {
-            SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
-                            SourceMgr::DK_Error,
-                            "invalid name in named regex definition");
-            return true;
+        if (i == 0) {
+          if (Name[i] == '$')  // Global vars start with '$'
+            continue;
+          if (Name[i] == '@') {
+            if (NameEnd != StringRef::npos) {
+              SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
+                              SourceMgr::DK_Error,
+                              "invalid name in named regex definition");
+              return true;
+            }
+            IsExpression = true;
+            continue;
           }
-          IsExpression = true;
-          continue;
         }
         if (Name[i] != '_' && !isalnum(Name[i]) &&
             (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
@@ -1262,6 +1272,18 @@ static void DumpCommandLine(int argc, char **argv) {
   errs() << "\n";
 }
 
+// Remove local variables from \p VariableTable. Global variables
+// (start with '$') are preserved.
+static void ClearLocalVars(StringMap<StringRef> &VariableTable) {
+  SmallVector<StringRef, 16> LocalVars;
+  for (const auto &Var : VariableTable)
+    if (Var.first()[0] != '$')
+      LocalVars.push_back(Var.first());
+
+  for (const auto &Var : LocalVars)
+    VariableTable.erase(Var);
+}
+
 /// Check the input to FileCheck provided in the \p Buffer against the \p
 /// CheckStrings read from the check file.
 ///
@@ -1298,6 +1320,9 @@ bool CheckInput(SourceMgr &SM, StringRef Buffer,
       ++j;
     }
 
+    if (EnableVarScope)
+      ClearLocalVars(VariableTable);
+
     for (; i != j; ++i) {
       const CheckString &CheckStr = CheckStrings[i];
 
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
index 1272d2b9f2872..3947d0220ed5e 100644
--- a/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -123,9 +123,12 @@ using namespace llvm;
 
 #define DEBUG_TYPE "asm-matcher-emitter"
 
+cl::OptionCategory AsmMatcherEmitterCat("Options for -gen-asm-matcher");
+
 static cl::opt<std::string>
-MatchPrefix("match-prefix", cl::init(""),
-            cl::desc("Only match instructions with the given prefix"));
+    MatchPrefix("match-prefix", cl::init(""),
+                cl::desc("Only match instructions with the given prefix"),
+                cl::cat(AsmMatcherEmitterCat));
 
 namespace {
 class AsmMatcherInfo;
@@ -2784,8 +2787,6 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   }
   OS << "  void convertToMapAndConstraints(unsigned Kind,\n                ";
   OS << "           const OperandVector &Operands) override;\n";
-  if (HasMnemonicFirst)
-    OS << "  bool mnemonicIsValid(StringRef Mnemonic, unsigned VariantID);\n";
   OS << "  unsigned MatchInstructionImpl(const OperandVector &Operands,\n"
      << "                                MCInst &Inst,\n"
      << "                                uint64_t &ErrorInfo,"
@@ -2883,7 +2884,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   StringTable.EmitString(OS);
   OS << ";\n\n";
 
-  // Emit the static match table; unused classes get initalized to 0 which is
+  // Emit the static match table; unused classes get initialized to 0 which is
   // guaranteed to be InvalidMatchClass.
   //
   // FIXME: We can reduce the size of this table very easily. First, we change
@@ -2967,28 +2968,6 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
     OS << "};\n\n";
   }
 
-  // A method to determine if a mnemonic is in the list.
-  if (HasMnemonicFirst) {
-    OS << "bool " << Target.getName() << ClassName << "::\n"
-       << "mnemonicIsValid(StringRef Mnemonic, unsigned VariantID) {\n";
-    OS << "  // Find the appropriate table for this asm variant.\n";
-    OS << "  const MatchEntry *Start, *End;\n";
-    OS << "  switch (VariantID) {\n";
-    OS << "  default: llvm_unreachable(\"invalid variant!\");\n";
-    for (unsigned VC = 0; VC != VariantCount; ++VC) {
-      Record *AsmVariant = Target.getAsmParserVariant(VC);
-      int AsmVariantNo = AsmVariant->getValueAsInt("Variant");
-      OS << "  case " << AsmVariantNo << ": Start = std::begin(MatchTable" << VC
-         << "); End = std::end(MatchTable" << VC << "); break;\n";
-    }
-    OS << "  }\n";
-    OS << "  // Search the table.\n";
-    OS << "  auto MnemonicRange = ";
-    OS << "std::equal_range(Start, End, Mnemonic, LessOpcode());\n";
-    OS << "  return MnemonicRange.first != MnemonicRange.second;\n";
-    OS << "}\n\n";
-  }
-
   // Finally, build the match function.
   OS << "unsigned " << Target.getName() << ClassName << "::\n"
      << "MatchInstructionImpl(const OperandVector &Operands,\n";
diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp
index a7c6104aaa214..40b7857ab994b 100644
--- a/utils/TableGen/AsmWriterEmitter.cpp
+++ b/utils/TableGen/AsmWriterEmitter.cpp
@@ -741,7 +741,7 @@ struct AliasPriorityComparator {
     if (LHS.second ==  RHS.second) {
       // We don't actually care about the order, but for consistency it
       // shouldn't depend on pointer comparisons.
-      return LHS.first.TheDef->getName() < RHS.first.TheDef->getName();
+      return LessRecordByID()(LHS.first.TheDef, RHS.first.TheDef);
     }
 
     // Aliases with larger priorities should be considered first.
@@ -813,10 +813,9 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
         // We only consider ReqFeatures predicates if PassSubtarget
         std::vector<Record *> RF =
             CGA.TheDef->getValueAsListOfDefs("Predicates");
-        std::copy_if(RF.begin(), RF.end(), std::back_inserter(ReqFeatures),
-                     [](Record *R) {
-                       return R->getValueAsBit("AssemblerMatcherPredicate");
-                     });
+        copy_if(RF, std::back_inserter(ReqFeatures), [](Record *R) {
+          return R->getValueAsBit("AssemblerMatcherPredicate");
+        });
       }
 
       unsigned NumMIOps = 0;
diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt
index 604e3f188eafd..b2913afae12a4 100644
--- a/utils/TableGen/CMakeLists.txt
+++ b/utils/TableGen/CMakeLists.txt
@@ -27,6 +27,7 @@ add_tablegen(llvm-tblgen LLVM
   IntrinsicEmitter.cpp
   OptParserEmitter.cpp
   PseudoLoweringEmitter.cpp
+  RegisterBankEmitter.cpp
   RegisterInfoEmitter.cpp
   SearchableTableEmitter.cpp
   SubtargetEmitter.cpp
@@ -34,6 +35,7 @@ add_tablegen(llvm-tblgen LLVM
   TableGen.cpp
   Types.cpp
   X86DisassemblerTables.cpp
+  X86EVEX2VEXTablesEmitter.cpp
   X86ModRMFilters.cpp
   X86RecognizableInstr.cpp
   CTagsEmitter.cpp
diff --git a/utils/TableGen/CallingConvEmitter.cpp b/utils/TableGen/CallingConvEmitter.cpp
index a47662b28558c..013e960657523 100644
--- a/utils/TableGen/CallingConvEmitter.cpp
+++ b/utils/TableGen/CallingConvEmitter.cpp
@@ -96,7 +96,7 @@ void CallingConvEmitter::EmitAction(Record *Action,
     } else if (Action->isSubClassOf("CCIf")) {
       O << Action->getValueAsString("Predicate");
     } else {
-      Action->dump();
+      errs() << *Action;
       PrintFatalError("Unknown CCPredicateAction!");
     }
     
@@ -268,7 +268,7 @@ void CallingConvEmitter::EmitAction(Record *Action,
         << "LocVT, LocInfo, ArgFlags, State))\n";
       O << IndentStr << IndentStr << "return false;\n";
     } else {
-      Action->dump();
+      errs() << *Action;
       PrintFatalError("Unknown CCAction!");
     }
   }
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index b82a76bb035de..972eb9cd3403e 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -580,56 +580,74 @@ bool EEVT::TypeSet::EnforceVectorSubVectorTypeIs(EEVT::TypeSet &VTOperand,
   return MadeChange;
 }
 
-/// EnforceVectorSameNumElts - 'this' is now constrained to
-/// be a vector with same num elements as VTOperand.
-bool EEVT::TypeSet::EnforceVectorSameNumElts(EEVT::TypeSet &VTOperand,
-                                             TreePattern &TP) {
+/// EnforceameNumElts - If VTOperand is a scalar, then 'this' is a scalar. If
+/// VTOperand is a vector, then 'this' must have the same number of elements.
+bool EEVT::TypeSet::EnforceSameNumElts(EEVT::TypeSet &VTOperand,
+                                       TreePattern &TP) {
   if (TP.hasError())
     return false;
 
-  // "This" must be a vector and "VTOperand" must be a vector.
   bool MadeChange = false;
-  MadeChange |= EnforceVector(TP);
-  MadeChange |= VTOperand.EnforceVector(TP);
 
-  // If we know one of the vector types, it forces the other type to agree.
+  if (isCompletelyUnknown())
+    MadeChange = FillWithPossibleTypes(TP);
+
+  if (VTOperand.isCompletelyUnknown())
+    MadeChange = VTOperand.FillWithPossibleTypes(TP);
+
+  // If one contains vectors but the other doesn't pull vectors out.
+  if (!hasVectorTypes())
+    MadeChange |= VTOperand.EnforceScalar(TP);
+  else if (!hasScalarTypes())
+    MadeChange |= VTOperand.EnforceVector(TP);
+  if (!VTOperand.hasVectorTypes())
+    MadeChange |= EnforceScalar(TP);
+  else if (!VTOperand.hasScalarTypes())
+    MadeChange |= EnforceVector(TP);
+
+  // If one type is a vector, make sure the other has the same element count.
+  // If this a scalar, then we are already done with the above.
   if (isConcrete()) {
     MVT IVT = getConcrete();
-    unsigned NumElems = IVT.getVectorNumElements();
+    if (IVT.isVector()) {
+      unsigned NumElems = IVT.getVectorNumElements();
 
-    // Only keep types that have same elements as 'this'.
-    TypeSet InputSet(VTOperand);
+      // Only keep types that have same elements as 'this'.
+      TypeSet InputSet(VTOperand);
 
-    auto I = remove_if(VTOperand.TypeVec, [NumElems](MVT VVT) {
-      return VVT.getVectorNumElements() != NumElems;
-    });
-    MadeChange |= I != VTOperand.TypeVec.end();
-    VTOperand.TypeVec.erase(I, VTOperand.TypeVec.end());
+      auto I = remove_if(VTOperand.TypeVec, [NumElems](MVT VVT) {
+        return VVT.getVectorNumElements() != NumElems;
+      });
+      MadeChange |= I != VTOperand.TypeVec.end();
+      VTOperand.TypeVec.erase(I, VTOperand.TypeVec.end());
 
-    if (VTOperand.TypeVec.empty()) {  // FIXME: Really want an SMLoc here!
-      TP.error("Type inference contradiction found, forcing '" +
-               InputSet.getName() + "' to have same number elements as '" +
-               getName() + "'");
-      return false;
+      if (VTOperand.TypeVec.empty()) {  // FIXME: Really want an SMLoc here!
+        TP.error("Type inference contradiction found, forcing '" +
+                 InputSet.getName() + "' to have same number elements as '" +
+                 getName() + "'");
+        return false;
+      }
     }
   } else if (VTOperand.isConcrete()) {
     MVT IVT = VTOperand.getConcrete();
-    unsigned NumElems = IVT.getVectorNumElements();
+    if (IVT.isVector()) {
+      unsigned NumElems = IVT.getVectorNumElements();
 
-    // Only keep types that have same elements as VTOperand.
-    TypeSet InputSet(*this);
+      // Only keep types that have same elements as VTOperand.
+      TypeSet InputSet(*this);
 
-    auto I = remove_if(TypeVec, [NumElems](MVT VVT) {
-      return VVT.getVectorNumElements() != NumElems;
-    });
-    MadeChange |= I != TypeVec.end();
-    TypeVec.erase(I, TypeVec.end());
+      auto I = remove_if(TypeVec, [NumElems](MVT VVT) {
+        return VVT.getVectorNumElements() != NumElems;
+      });
+      MadeChange |= I != TypeVec.end();
+      TypeVec.erase(I, TypeVec.end());
 
-    if (TypeVec.empty()) {  // FIXME: Really want an SMLoc here!
-      TP.error("Type inference contradiction found, forcing '" +
-               InputSet.getName() + "' to have same number elements than '" +
-               VTOperand.getName() + "'");
-      return false;
+      if (TypeVec.empty()) {  // FIXME: Really want an SMLoc here!
+        TP.error("Type inference contradiction found, forcing '" +
+                 InputSet.getName() + "' to have same number elements than '" +
+                 VTOperand.getName() + "'");
+        return false;
+      }
     }
   }
 
@@ -644,6 +662,12 @@ bool EEVT::TypeSet::EnforceSameSize(EEVT::TypeSet &VTOperand,
 
   bool MadeChange = false;
 
+  if (isCompletelyUnknown())
+    MadeChange = FillWithPossibleTypes(TP);
+
+  if (VTOperand.isCompletelyUnknown())
+    MadeChange = VTOperand.FillWithPossibleTypes(TP);
+
   // If we know one of the types, it forces the other type agree.
   if (isConcrete()) {
     MVT IVT = getConcrete();
@@ -1058,7 +1082,7 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
       getOperandNum(x.SDTCisSameNumEltsAs_Info.OtherOperandNum,
                     N, NodeInfo, OResNo);
     return OtherNode->getExtType(OResNo).
-      EnforceVectorSameNumElts(NodeToApply->getExtType(ResNo), TP);
+      EnforceSameNumElts(NodeToApply->getExtType(ResNo), TP);
   }
   case SDTCisSameSizeAs: {
     unsigned OResNo = 0;
@@ -1248,7 +1272,7 @@ static unsigned GetNumNodeResults(Record *Operator, CodeGenDAGPatterns &CDP) {
   if (Operator->isSubClassOf("ComplexPattern"))
     return 1;
 
-  Operator->dump();
+  errs() << *Operator;
   PrintFatalError("Unhandled node in GetNumNodeResults");
 }
 
@@ -2114,7 +2138,7 @@ TreePatternNode *TreePattern::ParseTreePattern(Init *TheInit, StringRef OpName){
 
   DagInit *Dag = dyn_cast<DagInit>(TheInit);
   if (!Dag) {
-    TheInit->dump();
+    TheInit->print(errs());
     error("Pattern has unexpected init kind!");
   }
   DefInit *OpDef = dyn_cast<DefInit>(Dag->getOperator());
diff --git a/utils/TableGen/CodeGenDAGPatterns.h b/utils/TableGen/CodeGenDAGPatterns.h
index 97401cd81713e..189d6e382ee7c 100644
--- a/utils/TableGen/CodeGenDAGPatterns.h
+++ b/utils/TableGen/CodeGenDAGPatterns.h
@@ -144,9 +144,10 @@ namespace EEVT {
     /// be a vector type VT.
     bool EnforceVectorSubVectorTypeIs(EEVT::TypeSet &VT, TreePattern &TP);
 
-    /// EnforceVectorSameNumElts - 'this' is now constrained to
-    /// be a vector with same num elements as VT.
-    bool EnforceVectorSameNumElts(EEVT::TypeSet &VT, TreePattern &TP);
+    /// EnforceSameNumElts - If VTOperand is a scalar, then 'this' is a scalar.
+    /// If VTOperand is a vector, then 'this' must have the same number of
+    /// elements.
+    bool EnforceSameNumElts(EEVT::TypeSet &VT, TreePattern &TP);
 
     /// EnforceSameSize - 'this' is now constrained to be the same size as VT.
     bool EnforceSameSize(EEVT::TypeSet &VT, TreePattern &TP);
diff --git a/utils/TableGen/CodeGenMapTable.cpp b/utils/TableGen/CodeGenMapTable.cpp
index 8032d7b3ee95a..60db6c267ad73 100644
--- a/utils/TableGen/CodeGenMapTable.cpp
+++ b/utils/TableGen/CodeGenMapTable.cpp
@@ -367,7 +367,7 @@ unsigned MapTableEmitter::emitBinSearchTable(raw_ostream &OS) {
 
   ArrayRef<const CodeGenInstruction*> NumberedInstructions =
                                             Target.getInstructionsByEnumValue();
-  std::string TargetName = Target.getName();
+  std::string Namespace = Target.getInstNamespace();
   const std::vector<ListInit*> &ValueCols = InstrMapDesc.getValueCols();
   unsigned NumCol = ValueCols.size();
   unsigned TotalNumInstr = NumberedInstructions.size();
@@ -387,22 +387,22 @@ unsigned MapTableEmitter::emitBinSearchTable(raw_ostream &OS) {
         if (ColInstrs[j] != nullptr) {
           RelExists = 1;
           OutStr += ", ";
-          OutStr += TargetName;
+          OutStr += Namespace;
           OutStr += "::";
           OutStr += ColInstrs[j]->getName();
         } else { OutStr += ", (uint16_t)-1U";}
       }
 
       if (RelExists) {
-        OS << "  { " << TargetName << "::" << CurInstr->getName();
+        OS << "  { " << Namespace << "::" << CurInstr->getName();
         OS << OutStr <<" },\n";
         TableSize++;
       }
     }
   }
   if (!TableSize) {
-    OS << "  { " << TargetName << "::" << "INSTRUCTION_LIST_END, ";
-    OS << TargetName << "::" << "INSTRUCTION_LIST_END }";
+    OS << "  { " << Namespace << "::" << "INSTRUCTION_LIST_END, ";
+    OS << Namespace << "::" << "INSTRUCTION_LIST_END }";
   }
   OS << "}; // End of " << InstrMapDesc.getName() << "Table\n\n";
   return TableSize;
@@ -567,7 +567,7 @@ namespace llvm {
 //===----------------------------------------------------------------------===//
 void EmitMapTable(RecordKeeper &Records, raw_ostream &OS) {
   CodeGenTarget Target(Records);
-  std::string TargetName = Target.getName();
+  std::string NameSpace = Target.getInstNamespace();
   std::vector<Record*> InstrMapVec;
   InstrMapVec = Records.getAllDerivedDefinitions("InstrMapping");
 
@@ -577,7 +577,7 @@ void EmitMapTable(RecordKeeper &Records, raw_ostream &OS) {
   OS << "#ifdef GET_INSTRMAP_INFO\n";
   OS << "#undef GET_INSTRMAP_INFO\n";
   OS << "namespace llvm {\n\n";
-  OS << "namespace " << TargetName << " {\n\n";
+  OS << "namespace " << NameSpace << " {\n\n";
 
   // Emit coulumn field names and their values as enums.
   emitEnums(OS, Records);
@@ -600,7 +600,7 @@ void EmitMapTable(RecordKeeper &Records, raw_ostream &OS) {
     // Emit map tables and the functions to query them.
     IMap.emitTablesWithFunc(OS);
   }
-  OS << "} // End " << TargetName << " namespace\n";
+  OS << "} // End " << NameSpace << " namespace\n";
   OS << "} // End llvm namespace\n";
   OS << "#endif // GET_INSTRMAP_INFO\n\n";
 }
diff --git a/utils/TableGen/CodeGenRegisters.cpp b/utils/TableGen/CodeGenRegisters.cpp
index c03e0d1fcf6b1..627614d991d52 100644
--- a/utils/TableGen/CodeGenRegisters.cpp
+++ b/utils/TableGen/CodeGenRegisters.cpp
@@ -1668,7 +1668,7 @@ void CodeGenRegBank::computeRegUnitSets() {
           dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name
                  << ":";
           for (auto &U : RegUnitSets[USIdx].Units)
-            dbgs() << " " << RegUnits[U].Roots[0]->getName();
+            printRegUnitName(U);
           dbgs() << "\n";
         });
 
@@ -1681,7 +1681,7 @@ void CodeGenRegBank::computeRegUnitSets() {
           dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name
                  << ":";
           for (auto &U : RegUnitSets[USIdx].Units)
-            dbgs() << " " << RegUnits[U].Roots[0]->getName();
+            printRegUnitName(U);
           dbgs() << "\n";
         }
         dbgs() << "\nUnion sets:\n");
@@ -1727,7 +1727,7 @@ void CodeGenRegBank::computeRegUnitSets() {
         DEBUG(dbgs() << "UnitSet " << RegUnitSets.size()-1
               << " " << RegUnitSets.back().Name << ":";
               for (auto &U : RegUnitSets.back().Units)
-                dbgs() << " " << RegUnits[U].Roots[0]->getName();
+                printRegUnitName(U);
               dbgs() << "\n";);
       }
     }
@@ -1742,7 +1742,7 @@ void CodeGenRegBank::computeRegUnitSets() {
           dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name
                  << ":";
           for (auto &U : RegUnitSets[USIdx].Units)
-            dbgs() << " " << RegUnits[U].Roots[0]->getName();
+            printRegUnitName(U);
           dbgs() << "\n";
         });
 
@@ -1763,8 +1763,8 @@ void CodeGenRegBank::computeRegUnitSets() {
       continue;
 
     DEBUG(dbgs() << "RC " << RC.getName() << " Units: \n";
-          for (auto &U : RCRegUnits)
-            dbgs() << RegUnits[U].getRoots()[0]->getName() << " ";
+          for (auto U : RCRegUnits)
+            printRegUnitName(U);
           dbgs() << "\n  UnitSetIDs:");
 
     // Find all supersets.
@@ -2170,3 +2170,10 @@ BitVector CodeGenRegBank::computeCoveredRegisters(ArrayRef<Record*> Regs) {
     BV.set(Set[i]->EnumValue);
   return BV;
 }
+
+void CodeGenRegBank::printRegUnitName(unsigned Unit) const {
+  if (Unit < NumNativeRegUnits)
+    dbgs() << ' ' << RegUnits[Unit].Roots[0]->getName();
+  else
+    dbgs() << " #" << Unit;
+}
diff --git a/utils/TableGen/CodeGenRegisters.h b/utils/TableGen/CodeGenRegisters.h
index 3ed26fa401a13..9366838c77cd0 100644
--- a/utils/TableGen/CodeGenRegisters.h
+++ b/utils/TableGen/CodeGenRegisters.h
@@ -735,6 +735,10 @@ namespace llvm {
     // LaneMask is contained in CoveringLanes will be completely covered by
     // another sub-register with the same or larger lane mask.
     LaneBitmask CoveringLanes;
+
+    // Helper function for printing debug information. Handles artificial
+    // (non-native) reg units.
+    void printRegUnitName(unsigned Unit) const;
   };
 
 } // end namespace llvm
diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp
index 6503d5af2d48d..d93511b0d8733 100644
--- a/utils/TableGen/CodeGenTarget.cpp
+++ b/utils/TableGen/CodeGenTarget.cpp
@@ -25,13 +25,18 @@
 #include <algorithm>
 using namespace llvm;
 
+cl::OptionCategory AsmParserCat("Options for -gen-asm-parser");
+cl::OptionCategory AsmWriterCat("Options for -gen-asm-writer");
+
 static cl::opt<unsigned>
-AsmParserNum("asmparsernum", cl::init(0),
-             cl::desc("Make -gen-asm-parser emit assembly parser #N"));
+    AsmParserNum("asmparsernum", cl::init(0),
+                 cl::desc("Make -gen-asm-parser emit assembly parser #N"),
+                 cl::cat(AsmParserCat));
 
 static cl::opt<unsigned>
-AsmWriterNum("asmwriternum", cl::init(0),
-             cl::desc("Make -gen-asm-writer emit assembly writer #N"));
+    AsmWriterNum("asmwriternum", cl::init(0),
+                 cl::desc("Make -gen-asm-writer emit assembly writer #N"),
+                 cl::cat(AsmWriterCat));
 
 /// getValueType - Return the MVT::SimpleValueType that the specified TableGen
 /// record corresponds to.
diff --git a/utils/TableGen/DAGISelMatcherEmitter.cpp b/utils/TableGen/DAGISelMatcherEmitter.cpp
index d30fc5131cbaf..67e8f15b248e7 100644
--- a/utils/TableGen/DAGISelMatcherEmitter.cpp
+++ b/utils/TableGen/DAGISelMatcherEmitter.cpp
@@ -11,14 +11,18 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "DAGISelMatcher.h"
 #include "CodeGenDAGPatterns.h"
+#include "DAGISelMatcher.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 using namespace llvm;
 
@@ -26,10 +30,17 @@ enum {
   CommentIndent = 30
 };
 
+cl::OptionCategory DAGISelCat("Options for -gen-dag-isel");
+
 // To reduce generated source code size.
-static cl::opt<bool>
-OmitComments("omit-comments", cl::desc("Do not generate comments"),
-             cl::init(false));
+static cl::opt<bool> OmitComments("omit-comments",
+                                  cl::desc("Do not generate comments"),
+                                  cl::init(false), cl::cat(DAGISelCat));
+
+static cl::opt<bool> InstrumentCoverage(
+    "instrument-coverage",
+    cl::desc("Generates tables to help identify patterns matched"),
+    cl::init(false), cl::cat(DAGISelCat));
 
 namespace {
 class MatcherTableEmitter {
@@ -52,6 +63,19 @@ class MatcherTableEmitter {
   DenseMap<Record*, unsigned> NodeXFormMap;
   std::vector<Record*> NodeXForms;
 
+  std::vector<std::string> VecIncludeStrings;
+  MapVector<std::string, unsigned, StringMap<unsigned> > VecPatterns;
+
+  unsigned getPatternIdxFromTable(std::string &&P, std::string &&include_loc) {
+    const auto It = VecPatterns.find(P);
+    if (It == VecPatterns.end()) {
+      VecPatterns.insert(make_pair(std::move(P), VecPatterns.size()));
+      VecIncludeStrings.push_back(std::move(include_loc));
+      return VecIncludeStrings.size() - 1;
+    }
+    return It->second;
+  }
+
 public:
   MatcherTableEmitter(const CodeGenDAGPatterns &cgp)
     : CGP(cgp) {}
@@ -62,6 +86,9 @@ public:
   void EmitPredicateFunctions(formatted_raw_ostream &OS);
 
   void EmitHistogram(const Matcher *N, formatted_raw_ostream &OS);
+
+  void EmitPatternMatchTable(raw_ostream &OS);
+
 private:
   unsigned EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
                        formatted_raw_ostream &OS);
@@ -117,6 +144,14 @@ private:
 };
 } // end anonymous namespace.
 
+static std::string GetPatFromTreePatternNode(const TreePatternNode *N) {
+  std::string str;
+  raw_string_ostream Stream(str);
+  Stream << *N;
+  Stream.str();
+  return str;
+}
+
 static unsigned GetVBRSize(unsigned Val) {
   if (Val <= 127) return 1;
 
@@ -150,6 +185,56 @@ static uint64_t EmitVBRValue(uint64_t Val, raw_ostream &OS) {
   return NumBytes+1;
 }
 
+// This is expensive and slow.
+static std::string getIncludePath(const Record *R) {
+  std::string str;
+  raw_string_ostream Stream(str);
+  auto Locs = R->getLoc();
+  SMLoc L;
+  if (Locs.size() > 1) {
+    // Get where the pattern prototype was instantiated
+    L = Locs[1];
+  } else if (Locs.size() == 1) {
+    L = Locs[0];
+  }
+  unsigned CurBuf = SrcMgr.FindBufferContainingLoc(L);
+  assert(CurBuf && "Invalid or unspecified location!");
+
+  Stream << SrcMgr.getBufferInfo(CurBuf).Buffer->getBufferIdentifier() << ":"
+         << SrcMgr.FindLineNumber(L, CurBuf);
+  Stream.str();
+  return str;
+}
+
+void MatcherTableEmitter::EmitPatternMatchTable(raw_ostream &OS) {
+
+  assert(isUInt<16>(VecPatterns.size()) &&
+         "Using only 16 bits to encode offset into Pattern Table");
+  assert(VecPatterns.size() == VecIncludeStrings.size() &&
+         "The sizes of Pattern and include vectors should be the same");
+  OS << "StringRef getPatternForIndex(unsigned Index) override {\n";
+  OS << "static const char * PATTERN_MATCH_TABLE[] = {\n";
+
+  for (const auto &It : VecPatterns) {
+    OS << "\"" << It.first << "\",\n";
+  }
+
+  OS << "\n};";
+  OS << "\nreturn StringRef(PATTERN_MATCH_TABLE[Index]);";
+  OS << "\n}";
+
+  OS << "\nStringRef getIncludePathForIndex(unsigned Index) override {\n";
+  OS << "static const char * INCLUDE_PATH_TABLE[] = {\n";
+
+  for (const auto &It : VecIncludeStrings) {
+    OS << "\"" << It << "\",\n";
+  }
+
+  OS << "\n};";
+  OS << "\nreturn StringRef(INCLUDE_PATH_TABLE[Index]);";
+  OS << "\n}";
+}
+
 /// EmitMatcher - Emit bytes for the specified matcher and return
 /// the number of bytes emitted.
 unsigned MatcherTableEmitter::
@@ -537,6 +622,23 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
 
   case Matcher::EmitNode:
   case Matcher::MorphNodeTo: {
+    auto NumCoveredBytes = 0;
+    if (InstrumentCoverage) {
+      if (const MorphNodeToMatcher *SNT = dyn_cast<MorphNodeToMatcher>(N)) {
+        NumCoveredBytes = 3;
+        OS << "OPC_Coverage, ";
+        std::string src =
+            GetPatFromTreePatternNode(SNT->getPattern().getSrcPattern());
+        std::string dst =
+            GetPatFromTreePatternNode(SNT->getPattern().getDstPattern());
+        Record *PatRecord = SNT->getPattern().getSrcRecord();
+        std::string include_src = getIncludePath(PatRecord);
+        unsigned Offset =
+            getPatternIdxFromTable(src + " -> " + dst, std::move(include_src));
+        OS << "TARGET_VAL(" << Offset << "),\n";
+        OS.PadToColumn(Indent * 2);
+      }
+    }
     const EmitNodeMatcherCommon *EN = cast<EmitNodeMatcherCommon>(N);
     OS << (isa<EmitNodeMatcher>(EN) ? "OPC_EmitNode" : "OPC_MorphNodeTo");
     bool CompressVTs = EN->getNumVTs() < 3;
@@ -593,10 +695,26 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
     } else
       OS << '\n';
 
-    return 5 + !CompressVTs + EN->getNumVTs() + NumOperandBytes;
+    return 5 + !CompressVTs + EN->getNumVTs() + NumOperandBytes +
+           NumCoveredBytes;
   }
   case Matcher::CompleteMatch: {
     const CompleteMatchMatcher *CM = cast<CompleteMatchMatcher>(N);
+    auto NumCoveredBytes = 0;
+    if (InstrumentCoverage) {
+      NumCoveredBytes = 3;
+      OS << "OPC_Coverage, ";
+      std::string src =
+          GetPatFromTreePatternNode(CM->getPattern().getSrcPattern());
+      std::string dst =
+          GetPatFromTreePatternNode(CM->getPattern().getDstPattern());
+      Record *PatRecord = CM->getPattern().getSrcRecord();
+      std::string include_src = getIncludePath(PatRecord);
+      unsigned Offset =
+          getPatternIdxFromTable(src + " -> " + dst, std::move(include_src));
+      OS << "TARGET_VAL(" << Offset << "),\n";
+      OS.PadToColumn(Indent * 2);
+    }
     OS << "OPC_CompleteMatch, " << CM->getNumResults() << ", ";
     unsigned NumResultBytes = 0;
     for (unsigned i = 0, e = CM->getNumResults(); i != e; ++i)
@@ -610,7 +728,7 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
         << *CM->getPattern().getDstPattern();
     }
     OS << '\n';
-    return 2 + NumResultBytes;
+    return 2 + NumResultBytes + NumCoveredBytes;
   }
   }
   llvm_unreachable("Unreachable");
@@ -686,8 +804,13 @@ void MatcherTableEmitter::EmitPredicateFunctions(formatted_raw_ostream &OS) {
         ++NumOps;  // Get the chained node too.
 
       OS << "  case " << i << ":\n";
+      if (InstrumentCoverage)
+        OS << "  {\n";
       OS << "    Result.resize(NextRes+" << NumOps << ");\n";
-      OS << "    return "  << P.getSelectFunc();
+      if (InstrumentCoverage)
+        OS << "    bool Succeeded = " << P.getSelectFunc();
+      else
+        OS << "  return " << P.getSelectFunc();
 
       OS << "(";
       // If the complex pattern wants the root of the match, pass it in as the
@@ -704,6 +827,13 @@ void MatcherTableEmitter::EmitPredicateFunctions(formatted_raw_ostream &OS) {
       for (unsigned i = 0; i != NumOps; ++i)
         OS << ", Result[NextRes+" << i << "].first";
       OS << ");\n";
+      if (InstrumentCoverage) {
+        OS << "    if (Succeeded)\n";
+        OS << "       dbgs() << \"\\nCOMPLEX_PATTERN: " << P.getSelectFunc()
+           << "\\n\" ;\n";
+        OS << "    return Succeeded;\n";
+        OS << "    }\n";
+      }
     }
     OS << "  }\n";
     OS << "}\n\n";
@@ -827,7 +957,7 @@ void llvm::EmitMatcherTable(const Matcher *TheMatcher,
   formatted_raw_ostream OS(O);
 
   OS << "// The main instruction selector code.\n";
-  OS << "SDNode *SelectCode(SDNode *N) {\n";
+  OS << "void SelectCode(SDNode *N) {\n";
 
   MatcherTableEmitter MatcherEmitter(CGP);
 
@@ -842,9 +972,11 @@ void llvm::EmitMatcherTable(const Matcher *TheMatcher,
 
   OS << "  #undef TARGET_VAL\n";
   OS << "  SelectCodeCommon(N, MatcherTable,sizeof(MatcherTable));\n";
-  OS << "  return nullptr;\n";
   OS << "}\n";
 
   // Next up, emit the function for node and pattern predicates:
   MatcherEmitter.EmitPredicateFunctions(OS);
+
+  if (InstrumentCoverage)
+    MatcherEmitter.EmitPatternMatchTable(OS);
 }
diff --git a/utils/TableGen/DAGISelMatcherOpt.cpp b/utils/TableGen/DAGISelMatcherOpt.cpp
index 783b35e745f8f..0bb656826fbdf 100644
--- a/utils/TableGen/DAGISelMatcherOpt.cpp
+++ b/utils/TableGen/DAGISelMatcherOpt.cpp
@@ -181,15 +181,21 @@ static Matcher *FindNodeWithKind(Matcher *M, Matcher::KindTy Kind) {
 ///       ABC
 ///       XYZ
 ///
-static void FactorNodes(std::unique_ptr<Matcher> &MatcherPtr) {
-  // If we reached the end of the chain, we're done.
-  Matcher *N = MatcherPtr.get();
-  if (!N) return;
-  
-  // If this is not a push node, just scan for one.
-  ScopeMatcher *Scope = dyn_cast<ScopeMatcher>(N);
-  if (!Scope)
-    return FactorNodes(N->getNextPtr());
+static void FactorNodes(std::unique_ptr<Matcher> &InputMatcherPtr) {
+  // Look for a push node. Iterates instead of recurses to reduce stack usage.
+  ScopeMatcher *Scope = nullptr;
+  std::unique_ptr<Matcher> *RebindableMatcherPtr = &InputMatcherPtr;
+  while (!Scope) {
+    // If we reached the end of the chain, we're done.
+    Matcher *N = RebindableMatcherPtr->get();
+    if (!N) return;
+
+    // If this is not a push node, just scan for one.
+    Scope = dyn_cast<ScopeMatcher>(N);
+    if (!Scope)
+      RebindableMatcherPtr = &(N->getNextPtr());
+  }
+  std::unique_ptr<Matcher> &MatcherPtr = *RebindableMatcherPtr;
   
   // Okay, pull together the children of the scope node into a vector so we can
   // inspect it more easily.
diff --git a/utils/TableGen/FastISelEmitter.cpp b/utils/TableGen/FastISelEmitter.cpp
index 43c6a98263302..0e7b0dc09442d 100644
--- a/utils/TableGen/FastISelEmitter.cpp
+++ b/utils/TableGen/FastISelEmitter.cpp
@@ -1,4 +1,4 @@
-//===- FastISelEmitter.cpp - Generate an instruction selector -------------===//
+///===- FastISelEmitter.cpp - Generate an instruction selector -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -640,12 +640,9 @@ void FastISelMap::emitInstructionCode(raw_ostream &OS,
       OneHadNoPredicate = true;
     } else {
       if (OneHadNoPredicate) {
-        // FIXME: This should be a PrintError once the x86 target
-        // fixes PR21575.
-        PrintWarning("Multiple instructions match and one with no "
-                     "predicate came before one with a predicate!  "
-                     "name:" + Memo.Name + "  predicate: " + 
-                     PredicateCheck);
+        PrintFatalError("Multiple instructions match and one with no "
+                        "predicate came before one with a predicate!  "
+                        "name:" + Memo.Name + "  predicate: " + PredicateCheck);
       }
       OS << "  if (" + PredicateCheck + ") {\n";
       OS << "  ";
diff --git a/utils/TableGen/GlobalISelEmitter.cpp b/utils/TableGen/GlobalISelEmitter.cpp
index 2bc6181045c5d..7acc65e349ea6 100644
--- a/utils/TableGen/GlobalISelEmitter.cpp
+++ b/utils/TableGen/GlobalISelEmitter.cpp
@@ -32,197 +32,1279 @@
 
 #include "CodeGenDAGPatterns.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
+#include "llvm/Support/ScopedPrinter.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
 #include <string>
+#include <numeric>
 using namespace llvm;
 
 #define DEBUG_TYPE "gisel-emitter"
 
 STATISTIC(NumPatternTotal, "Total number of patterns");
-STATISTIC(NumPatternSkipped, "Number of patterns skipped");
+STATISTIC(NumPatternImported, "Number of patterns imported from SelectionDAG");
+STATISTIC(NumPatternImportsSkipped, "Number of SelectionDAG imports skipped");
 STATISTIC(NumPatternEmitted, "Number of patterns emitted");
 
+cl::OptionCategory GlobalISelEmitterCat("Options for -gen-global-isel");
+
 static cl::opt<bool> WarnOnSkippedPatterns(
     "warn-on-skipped-patterns",
     cl::desc("Explain why a pattern was skipped for inclusion "
              "in the GlobalISel selector"),
-    cl::init(false));
+    cl::init(false), cl::cat(GlobalISelEmitterCat));
 
 namespace {
+//===- Helper functions ---------------------------------------------------===//
+
+/// This class stands in for LLT wherever we want to tablegen-erate an
+/// equivalent at compiler run-time.
+class LLTCodeGen {
+private:
+  LLT Ty;
 
-class GlobalISelEmitter {
 public:
-  explicit GlobalISelEmitter(RecordKeeper &RK);
-  void run(raw_ostream &OS);
+  LLTCodeGen(const LLT &Ty) : Ty(Ty) {}
 
-private:
-  const RecordKeeper &RK;
-  const CodeGenDAGPatterns CGP;
-  const CodeGenTarget &Target;
+  void emitCxxConstructorCall(raw_ostream &OS) const {
+    if (Ty.isScalar()) {
+      OS << "LLT::scalar(" << Ty.getSizeInBits() << ")";
+      return;
+    }
+    if (Ty.isVector()) {
+      OS << "LLT::vector(" << Ty.getNumElements() << ", " << Ty.getSizeInBits()
+         << ")";
+      return;
+    }
+    llvm_unreachable("Unhandled LLT");
+  }
 
-  /// Keep track of the equivalence between SDNodes and Instruction.
-  /// This is defined using 'GINodeEquiv' in the target description.
-  DenseMap<Record *, const CodeGenInstruction *> NodeEquivs;
+  const LLT &get() const { return Ty; }
+};
 
-  void gatherNodeEquivs();
-  const CodeGenInstruction *findNodeEquiv(Record *N);
+class InstructionMatcher;
+class OperandPlaceholder {
+private:
+  enum PlaceholderKind {
+    OP_MatchReference,
+    OP_Temporary,
+  } Kind;
+
+  struct MatchReferenceData {
+    InstructionMatcher *InsnMatcher;
+    StringRef InsnVarName;
+    StringRef SymbolicName;
+  };
 
-  struct SkipReason {
-    std::string Reason;
+  struct TemporaryData {
+    unsigned OpIdx;
   };
 
-  /// Analyze pattern \p P, possibly emitting matching code for it to \p OS.
-  /// Otherwise, return a reason why this pattern was skipped for emission.
-  Optional<SkipReason> runOnPattern(const PatternToMatch &P,
-                                    raw_ostream &OS);
-};
+  union {
+    struct MatchReferenceData MatchReference;
+    struct TemporaryData Temporary;
+  };
 
-} // end anonymous namespace
+  OperandPlaceholder(PlaceholderKind Kind) : Kind(Kind) {}
 
-//===- Helper functions ---------------------------------------------------===//
+public:
+  ~OperandPlaceholder() {}
+
+  static OperandPlaceholder
+  CreateMatchReference(InstructionMatcher *InsnMatcher,
+                       StringRef InsnVarName, StringRef SymbolicName) {
+    OperandPlaceholder Result(OP_MatchReference);
+    Result.MatchReference.InsnMatcher = InsnMatcher;
+    Result.MatchReference.InsnVarName = InsnVarName;
+    Result.MatchReference.SymbolicName = SymbolicName;
+    return Result;
+  }
+
+  static OperandPlaceholder CreateTemporary(unsigned OpIdx) {
+    OperandPlaceholder Result(OP_Temporary);
+    Result.Temporary.OpIdx = OpIdx;
+    return Result;
+  }
+
+  void emitCxxValueExpr(raw_ostream &OS) const;
+};
 
 /// Convert an MVT to an equivalent LLT if possible, or the invalid LLT() for
 /// MVTs that don't map cleanly to an LLT (e.g., iPTR, *any, ...).
-static Optional<std::string> MVTToLLT(MVT::SimpleValueType SVT) {
-  std::string TyStr;
-  raw_string_ostream OS(TyStr);
+static Optional<LLTCodeGen> MVTToLLT(MVT::SimpleValueType SVT) {
   MVT VT(SVT);
-  if (VT.isVector() && VT.getVectorNumElements() != 1) {
-    OS << "LLT::vector(" << VT.getVectorNumElements() << ", "
-       << VT.getScalarSizeInBits() << ")";
-  } else if (VT.isInteger() || VT.isFloatingPoint()) {
-    OS << "LLT::scalar(" << VT.getSizeInBits() << ")";
-  } else {
-    return None;
+  if (VT.isVector() && VT.getVectorNumElements() != 1)
+    return LLTCodeGen(LLT::vector(VT.getVectorNumElements(), VT.getScalarSizeInBits()));
+  if (VT.isInteger() || VT.isFloatingPoint())
+    return LLTCodeGen(LLT::scalar(VT.getSizeInBits()));
+  return None;
+}
+
+static std::string explainPredicates(const TreePatternNode *N) {
+  std::string Explanation = "";
+  StringRef Separator = "";
+  for (const auto &P : N->getPredicateFns()) {
+    Explanation +=
+        (Separator + P.getOrigPatFragRecord()->getRecord()->getName()).str();
+    if (P.isAlwaysTrue())
+      Explanation += " always-true";
+    if (P.isImmediatePattern())
+      Explanation += " immediate";
   }
-  OS.flush();
-  return TyStr;
+  return Explanation;
+}
+
+static std::string explainRulePredicates(const ArrayRef<Init *> Predicates) {
+  std::string Explanation = "";
+  StringRef Separator = "";
+  for (const auto *P : Predicates) {
+    Explanation += Separator;
+
+    if (const DefInit *PDef = dyn_cast<DefInit>(P)) {
+      Explanation += PDef->getDef()->getName();
+    } else
+      Explanation += "<unknown>";
+  }
+  return Explanation;
+}
+
+std::string explainOperator(Record *Operator) {
+  if (Operator->isSubClassOf("SDNode"))
+    return " (" + Operator->getValueAsString("Opcode") + ")";
+
+  if (Operator->isSubClassOf("Intrinsic"))
+    return (" (Operator is an Intrinsic, " + Operator->getName() + ")").str();
+
+  return " (Operator not understood)";
+}
+
+/// Helper function to let the emitter report skip reason error messages.
+static Error failedImport(const Twine &Reason) {
+  return make_error<StringError>(Reason, inconvertibleErrorCode());
 }
 
-static bool isTrivialOperatorNode(const TreePatternNode *N) {
-  return !N->isLeaf() && !N->hasAnyPredicate() && !N->getTransformFn();
+static Error isTrivialOperatorNode(const TreePatternNode *N) {
+  std::string Explanation = "";
+  std::string Separator = "";
+  if (N->isLeaf()) {
+    Explanation = "Is a leaf";
+    Separator = ", ";
+  }
+
+  if (N->hasAnyPredicate()) {
+    Explanation = Separator + "Has a predicate (" + explainPredicates(N) + ")";
+    Separator = ", ";
+  }
+
+  if (N->getTransformFn()) {
+    Explanation += Separator + "Has a transform function";
+    Separator = ", ";
+  }
+
+  if (!N->isLeaf() && !N->hasAnyPredicate() && !N->getTransformFn())
+    return Error::success();
+
+  return failedImport(Explanation);
 }
 
 //===- Matchers -----------------------------------------------------------===//
 
-struct Matcher {
-  virtual ~Matcher() {}
-  virtual void emit(raw_ostream &OS) const = 0;
+class OperandMatcher;
+class MatchAction;
+
+/// Generates code to check that a match rule matches.
+class RuleMatcher {
+  /// A list of matchers that all need to succeed for the current rule to match.
+  /// FIXME: This currently supports a single match position but could be
+  /// extended to support multiple positions to support div/rem fusion or
+  /// load-multiple instructions.
+  std::vector<std::unique_ptr<InstructionMatcher>> Matchers;
+
+  /// A list of actions that need to be taken when all predicates in this rule
+  /// have succeeded.
+  std::vector<std::unique_ptr<MatchAction>> Actions;
+
+  /// A map of instruction matchers to the local variables created by
+  /// emitCxxCaptureStmts().
+  std::map<const InstructionMatcher *, std::string> InsnVariableNames;
+
+  /// ID for the next instruction variable defined with defineInsnVar()
+  unsigned NextInsnVarID;
+
+public:
+  RuleMatcher()
+      : Matchers(), Actions(), InsnVariableNames(), NextInsnVarID(0) {}
+  RuleMatcher(RuleMatcher &&Other) = default;
+  RuleMatcher &operator=(RuleMatcher &&Other) = default;
+
+  InstructionMatcher &addInstructionMatcher();
+
+  template <class Kind, class... Args> Kind &addAction(Args &&... args);
+
+  std::string defineInsnVar(raw_ostream &OS, const InstructionMatcher &Matcher,
+                            StringRef Value);
+  StringRef getInsnVarName(const InstructionMatcher &InsnMatcher) const;
+
+  void emitCxxCapturedInsnList(raw_ostream &OS);
+  void emitCxxCaptureStmts(raw_ostream &OS, StringRef Expr);
+
+  void emit(raw_ostream &OS);
+
+  /// Compare the priority of this object and B.
+  ///
+  /// Returns true if this object is more important than B.
+  bool isHigherPriorityThan(const RuleMatcher &B) const;
+
+  /// Report the maximum number of temporary operands needed by the rule
+  /// matcher.
+  unsigned countTemporaryOperands() const;
 };
 
-raw_ostream &operator<<(raw_ostream &S, const Matcher &M) {
-  M.emit(S);
-  return S;
-}
+template <class PredicateTy> class PredicateListMatcher {
+private:
+  typedef std::vector<std::unique_ptr<PredicateTy>> PredicateVec;
+  PredicateVec Predicates;
 
-struct MatchAction {
-  virtual ~MatchAction() {}
-  virtual void emit(raw_ostream &OS) const = 0;
+public:
+  /// Construct a new operand predicate and add it to the matcher.
+  template <class Kind, class... Args>
+  Kind &addPredicate(Args&&... args) {
+    Predicates.emplace_back(
+        llvm::make_unique<Kind>(std::forward<Args>(args)...));
+    return *static_cast<Kind *>(Predicates.back().get());
+  }
+
+  typename PredicateVec::const_iterator predicates_begin() const { return Predicates.begin(); }
+  typename PredicateVec::const_iterator predicates_end() const { return Predicates.end(); }
+  iterator_range<typename PredicateVec::const_iterator> predicates() const {
+    return make_range(predicates_begin(), predicates_end());
+  }
+  typename PredicateVec::size_type predicates_size() const { return Predicates.size(); }
+
+  /// Emit a C++ expression that tests whether all the predicates are met.
+  template <class... Args>
+  void emitCxxPredicateListExpr(raw_ostream &OS, Args &&... args) const {
+    if (Predicates.empty()) {
+      OS << "true";
+      return;
+    }
+
+    StringRef Separator = "";
+    for (const auto &Predicate : predicates()) {
+      OS << Separator << "(";
+      Predicate->emitCxxPredicateExpr(OS, std::forward<Args>(args)...);
+      OS << ")";
+      Separator = " &&\n";
+    }
+  }
 };
 
-raw_ostream &operator<<(raw_ostream &S, const MatchAction &A) {
-  A.emit(S);
-  return S;
-}
+/// Generates code to check a predicate of an operand.
+///
+/// Typical predicates include:
+/// * Operand is a particular register.
+/// * Operand is assigned a particular register bank.
+/// * Operand is an MBB.
+class OperandPredicateMatcher {
+public:
+  /// This enum is used for RTTI and also defines the priority that is given to
+  /// the predicate when generating the matcher code. Kinds with higher priority
+  /// must be tested first.
+  ///
+  /// The relative priority of OPM_LLT, OPM_RegBank, and OPM_MBB do not matter
+  /// but OPM_Int must have priority over OPM_RegBank since constant integers
+  /// are represented by a virtual register defined by a G_CONSTANT instruction.
+  enum PredicateKind {
+    OPM_ComplexPattern,
+    OPM_Instruction,
+    OPM_Int,
+    OPM_LLT,
+    OPM_RegBank,
+    OPM_MBB,
+  };
 
-struct MatchOpcode : public Matcher {
-  MatchOpcode(const CodeGenInstruction *I) : I(I) {}
-  const CodeGenInstruction *I;
+protected:
+  PredicateKind Kind;
+
+public:
+  OperandPredicateMatcher(PredicateKind Kind) : Kind(Kind) {}
+  virtual ~OperandPredicateMatcher() {}
+
+  PredicateKind getKind() const { return Kind; }
+
+  /// Return the OperandMatcher for the specified operand or nullptr if there
+  /// isn't one by that name in this operand predicate matcher.
+  ///
+  /// InstructionOperandMatcher is the only subclass that can return non-null
+  /// for this.
+  virtual Optional<const OperandMatcher *>
+  getOptionalOperand(StringRef SymbolicName) const {
+    assert(!SymbolicName.empty() && "Cannot lookup unnamed operand");
+    return None;
+  }
+
+  /// Emit C++ statements to capture instructions into local variables.
+  ///
+  /// Only InstructionOperandMatcher needs to do anything for this method.
+  virtual void emitCxxCaptureStmts(raw_ostream &OS, RuleMatcher &Rule,
+                                   StringRef Expr) const {}
+
+  /// Emit a C++ expression that checks the predicate for the given operand.
+  virtual void emitCxxPredicateExpr(raw_ostream &OS, RuleMatcher &Rule,
+                                    StringRef OperandExpr) const = 0;
+
+  /// Compare the priority of this object and B.
+  ///
+  /// Returns true if this object is more important than B.
+  virtual bool isHigherPriorityThan(const OperandPredicateMatcher &B) const {
+    return Kind < B.Kind;
+  };
+
+  /// Report the maximum number of temporary operands needed by the predicate
+  /// matcher.
+  virtual unsigned countTemporaryOperands() const { return 0; }
+};
+
+/// Generates code to check that an operand is a particular LLT.
+class LLTOperandMatcher : public OperandPredicateMatcher {
+protected:
+  LLTCodeGen Ty;
 
-  virtual void emit(raw_ostream &OS) const {
-    OS << "I.getOpcode() == " << I->Namespace << "::" << I->TheDef->getName();
+public:
+  LLTOperandMatcher(const LLTCodeGen &Ty)
+      : OperandPredicateMatcher(OPM_LLT), Ty(Ty) {}
+
+  static bool classof(const OperandPredicateMatcher *P) {
+    return P->getKind() == OPM_LLT;
+  }
+
+  void emitCxxPredicateExpr(raw_ostream &OS, RuleMatcher &Rule,
+                            StringRef OperandExpr) const override {
+    OS << "MRI.getType(" << OperandExpr << ".getReg()) == (";
+    Ty.emitCxxConstructorCall(OS);
+    OS << ")";
   }
 };
 
-struct MatchRegOpType : public Matcher {
-  MatchRegOpType(unsigned OpIdx, std::string Ty)
-      : OpIdx(OpIdx), Ty(Ty) {}
-  unsigned OpIdx;
-  std::string Ty;
+/// Generates code to check that an operand is a particular target constant.
+class ComplexPatternOperandMatcher : public OperandPredicateMatcher {
+protected:
+  const OperandMatcher &Operand;
+  const Record &TheDef;
+
+  unsigned getNumOperands() const {
+    return TheDef.getValueAsDag("Operands")->getNumArgs();
+  }
 
-  virtual void emit(raw_ostream &OS) const {
-    OS << "MRI.getType(I.getOperand(" << OpIdx << ").getReg()) == (" << Ty
-       << ")";
+  unsigned getAllocatedTemporariesBaseID() const;
+
+public:
+  ComplexPatternOperandMatcher(const OperandMatcher &Operand,
+                               const Record &TheDef)
+      : OperandPredicateMatcher(OPM_ComplexPattern), Operand(Operand),
+        TheDef(TheDef) {}
+
+  static bool classof(const OperandPredicateMatcher *P) {
+    return P->getKind() == OPM_ComplexPattern;
+  }
+
+  void emitCxxPredicateExpr(raw_ostream &OS, RuleMatcher &Rule,
+                            StringRef OperandExpr) const override {
+    OS << TheDef.getValueAsString("MatcherFn") << "(" << OperandExpr;
+    for (unsigned I = 0; I < getNumOperands(); ++I) {
+      OS << ", ";
+      OperandPlaceholder::CreateTemporary(getAllocatedTemporariesBaseID() + I)
+          .emitCxxValueExpr(OS);
+    }
+    OS << ")";
+  }
+
+  unsigned countTemporaryOperands() const override {
+    return getNumOperands();
   }
 };
 
-struct MatchRegOpBank : public Matcher {
-  MatchRegOpBank(unsigned OpIdx, const CodeGenRegisterClass &RC)
-      : OpIdx(OpIdx), RC(RC) {}
-  unsigned OpIdx;
+/// Generates code to check that an operand is in a particular register bank.
+class RegisterBankOperandMatcher : public OperandPredicateMatcher {
+protected:
   const CodeGenRegisterClass &RC;
 
-  virtual void emit(raw_ostream &OS) const {
+public:
+  RegisterBankOperandMatcher(const CodeGenRegisterClass &RC)
+      : OperandPredicateMatcher(OPM_RegBank), RC(RC) {}
+
+  static bool classof(const OperandPredicateMatcher *P) {
+    return P->getKind() == OPM_RegBank;
+  }
+
+  void emitCxxPredicateExpr(raw_ostream &OS, RuleMatcher &Rule,
+                            StringRef OperandExpr) const override {
     OS << "(&RBI.getRegBankFromRegClass(" << RC.getQualifiedName()
-       << "RegClass) == RBI.getRegBank(I.getOperand(" << OpIdx
-       << ").getReg(), MRI, TRI))";
+       << "RegClass) == RBI.getRegBank(" << OperandExpr
+       << ".getReg(), MRI, TRI))";
   }
 };
 
-struct MatchMBBOp : public Matcher {
-  MatchMBBOp(unsigned OpIdx) : OpIdx(OpIdx) {}
+/// Generates code to check that an operand is a basic block.
+class MBBOperandMatcher : public OperandPredicateMatcher {
+public:
+  MBBOperandMatcher() : OperandPredicateMatcher(OPM_MBB) {}
+
+  static bool classof(const OperandPredicateMatcher *P) {
+    return P->getKind() == OPM_MBB;
+  }
+
+  void emitCxxPredicateExpr(raw_ostream &OS, RuleMatcher &Rule,
+                            StringRef OperandExpr) const override {
+    OS << OperandExpr << ".isMBB()";
+  }
+};
+
+/// Generates code to check that an operand is a particular int.
+class IntOperandMatcher : public OperandPredicateMatcher {
+protected:
+  int64_t Value;
+
+public:
+  IntOperandMatcher(int64_t Value)
+      : OperandPredicateMatcher(OPM_Int), Value(Value) {}
+
+  static bool classof(const OperandPredicateMatcher *P) {
+    return P->getKind() == OPM_Int;
+  }
+
+  void emitCxxPredicateExpr(raw_ostream &OS, RuleMatcher &Rule,
+                            StringRef OperandExpr) const override {
+    OS << "isOperandImmEqual(" << OperandExpr << ", " << Value << ", MRI)";
+  }
+};
+
+/// Generates code to check that a set of predicates match for a particular
+/// operand.
+class OperandMatcher : public PredicateListMatcher<OperandPredicateMatcher> {
+protected:
+  InstructionMatcher &Insn;
   unsigned OpIdx;
+  std::string SymbolicName;
+
+  /// The index of the first temporary variable allocated to this operand. The
+  /// number of allocated temporaries can be found with
+  /// countTemporaryOperands().
+  unsigned AllocatedTemporariesBaseID;
+
+public:
+  OperandMatcher(InstructionMatcher &Insn, unsigned OpIdx,
+                 const std::string &SymbolicName,
+                 unsigned AllocatedTemporariesBaseID)
+      : Insn(Insn), OpIdx(OpIdx), SymbolicName(SymbolicName),
+        AllocatedTemporariesBaseID(AllocatedTemporariesBaseID) {}
+
+  bool hasSymbolicName() const { return !SymbolicName.empty(); }
+  const StringRef getSymbolicName() const { return SymbolicName; }
+  void setSymbolicName(StringRef Name) {
+    assert(SymbolicName.empty() && "Operand already has a symbolic name");
+    SymbolicName = Name;
+  }
+  unsigned getOperandIndex() const { return OpIdx; }
+
+  std::string getOperandExpr(StringRef InsnVarName) const {
+    return (InsnVarName + ".getOperand(" + llvm::to_string(OpIdx) + ")").str();
+  }
+
+  Optional<const OperandMatcher *>
+  getOptionalOperand(StringRef DesiredSymbolicName) const {
+    assert(!DesiredSymbolicName.empty() && "Cannot lookup unnamed operand");
+    if (DesiredSymbolicName == SymbolicName)
+      return this;
+    for (const auto &OP : predicates()) {
+      const auto &MaybeOperand = OP->getOptionalOperand(DesiredSymbolicName);
+      if (MaybeOperand.hasValue())
+        return MaybeOperand.getValue();
+    }
+    return None;
+  }
+
+  InstructionMatcher &getInstructionMatcher() const { return Insn; }
+
+  /// Emit C++ statements to capture instructions into local variables.
+  void emitCxxCaptureStmts(raw_ostream &OS, RuleMatcher &Rule,
+                           StringRef OperandExpr) const {
+    for (const auto &Predicate : predicates())
+      Predicate->emitCxxCaptureStmts(OS, Rule, OperandExpr);
+  }
+
+  /// Emit a C++ expression that tests whether the instruction named in
+  /// InsnVarName matches all the predicate and all the operands.
+  void emitCxxPredicateExpr(raw_ostream &OS, RuleMatcher &Rule,
+                            StringRef InsnVarName) const {
+    OS << "(/* ";
+    if (SymbolicName.empty())
+      OS << "Operand " << OpIdx;
+    else
+      OS << SymbolicName;
+    OS << " */ ";
+    emitCxxPredicateListExpr(OS, Rule, getOperandExpr(InsnVarName));
+    OS << ")";
+  }
+
+  /// Compare the priority of this object and B.
+  ///
+  /// Returns true if this object is more important than B.
+  bool isHigherPriorityThan(const OperandMatcher &B) const {
+    // Operand matchers involving more predicates have higher priority.
+    if (predicates_size() > B.predicates_size())
+      return true;
+    if (predicates_size() < B.predicates_size())
+      return false;
+
+    // This assumes that predicates are added in a consistent order.
+    for (const auto &Predicate : zip(predicates(), B.predicates())) {
+      if (std::get<0>(Predicate)->isHigherPriorityThan(*std::get<1>(Predicate)))
+        return true;
+      if (std::get<1>(Predicate)->isHigherPriorityThan(*std::get<0>(Predicate)))
+        return false;
+    }
+
+    return false;
+  };
+
+  /// Report the maximum number of temporary operands needed by the operand
+  /// matcher.
+  unsigned countTemporaryOperands() const {
+    return std::accumulate(
+        predicates().begin(), predicates().end(), 0,
+        [](unsigned A,
+           const std::unique_ptr<OperandPredicateMatcher> &Predicate) {
+          return A + Predicate->countTemporaryOperands();
+        });
+  }
 
-  virtual void emit(raw_ostream &OS) const {
-    OS << "I.getOperand(" << OpIdx << ").isMBB()";
+  unsigned getAllocatedTemporariesBaseID() const {
+    return AllocatedTemporariesBaseID;
   }
 };
 
-struct MutateOpcode : public MatchAction {
-  MutateOpcode(const CodeGenInstruction *I) : I(I) {}
+unsigned ComplexPatternOperandMatcher::getAllocatedTemporariesBaseID() const {
+  return Operand.getAllocatedTemporariesBaseID();
+}
+
+/// Generates code to check a predicate on an instruction.
+///
+/// Typical predicates include:
+/// * The opcode of the instruction is a particular value.
+/// * The nsw/nuw flag is/isn't set.
+class InstructionPredicateMatcher {
+protected:
+  /// This enum is used for RTTI and also defines the priority that is given to
+  /// the predicate when generating the matcher code. Kinds with higher priority
+  /// must be tested first.
+  enum PredicateKind {
+    IPM_Opcode,
+  };
+
+  PredicateKind Kind;
+
+public:
+  InstructionPredicateMatcher(PredicateKind Kind) : Kind(Kind) {}
+  virtual ~InstructionPredicateMatcher() {}
+
+  PredicateKind getKind() const { return Kind; }
+
+  /// Emit a C++ expression that tests whether the instruction named in
+  /// InsnVarName matches the predicate.
+  virtual void emitCxxPredicateExpr(raw_ostream &OS, RuleMatcher &Rule,
+                                    StringRef InsnVarName) const = 0;
+
+  /// Compare the priority of this object and B.
+  ///
+  /// Returns true if this object is more important than B.
+  virtual bool isHigherPriorityThan(const InstructionPredicateMatcher &B) const {
+    return Kind < B.Kind;
+  };
+
+  /// Report the maximum number of temporary operands needed by the predicate
+  /// matcher.
+  virtual unsigned countTemporaryOperands() const { return 0; }
+};
+
+/// Generates code to check the opcode of an instruction.
+class InstructionOpcodeMatcher : public InstructionPredicateMatcher {
+protected:
   const CodeGenInstruction *I;
 
-  virtual void emit(raw_ostream &OS) const {
-    OS << "I.setDesc(TII.get(" << I->Namespace << "::" << I->TheDef->getName()
-       << "));";
+public:
+  InstructionOpcodeMatcher(const CodeGenInstruction *I)
+      : InstructionPredicateMatcher(IPM_Opcode), I(I) {}
+
+  static bool classof(const InstructionPredicateMatcher *P) {
+    return P->getKind() == IPM_Opcode;
+  }
+
+  void emitCxxPredicateExpr(raw_ostream &OS, RuleMatcher &Rule,
+                            StringRef InsnVarName) const override {
+    OS << InsnVarName << ".getOpcode() == " << I->Namespace
+       << "::" << I->TheDef->getName();
   }
+
+  /// Compare the priority of this object and B.
+  ///
+  /// Returns true if this object is more important than B.
+  bool isHigherPriorityThan(const InstructionPredicateMatcher &B) const override {
+    if (InstructionPredicateMatcher::isHigherPriorityThan(B))
+      return true;
+    if (B.InstructionPredicateMatcher::isHigherPriorityThan(*this))
+      return false;
+
+    // Prioritize opcodes for cosmetic reasons in the generated source. Although
+    // this is cosmetic at the moment, we may want to drive a similar ordering
+    // using instruction frequency information to improve compile time.
+    if (const InstructionOpcodeMatcher *BO =
+            dyn_cast<InstructionOpcodeMatcher>(&B))
+      return I->TheDef->getName() < BO->I->TheDef->getName();
+
+    return false;
+  };
+};
+
+/// Generates code to check that a set of predicates and operands match for a
+/// particular instruction.
+///
+/// Typical predicates include:
+/// * Has a specific opcode.
+/// * Has an nsw/nuw flag or doesn't.
+class InstructionMatcher
+    : public PredicateListMatcher<InstructionPredicateMatcher> {
+protected:
+  typedef std::vector<std::unique_ptr<OperandMatcher>> OperandVec;
+
+  /// The operands to match. All rendered operands must be present even if the
+  /// condition is always true.
+  OperandVec Operands;
+
+public:
+  /// Add an operand to the matcher.
+  OperandMatcher &addOperand(unsigned OpIdx, const std::string &SymbolicName,
+                             unsigned AllocatedTemporariesBaseID) {
+    Operands.emplace_back(new OperandMatcher(*this, OpIdx, SymbolicName,
+                                             AllocatedTemporariesBaseID));
+    return *Operands.back();
+  }
+
+  OperandMatcher &getOperand(unsigned OpIdx) {
+    auto I = std::find_if(Operands.begin(), Operands.end(),
+                          [&OpIdx](const std::unique_ptr<OperandMatcher> &X) {
+                            return X->getOperandIndex() == OpIdx;
+                          });
+    if (I != Operands.end())
+      return **I;
+    llvm_unreachable("Failed to lookup operand");
+  }
+
+  Optional<const OperandMatcher *>
+  getOptionalOperand(StringRef SymbolicName) const {
+    assert(!SymbolicName.empty() && "Cannot lookup unnamed operand");
+    for (const auto &Operand : Operands) {
+      const auto &OM = Operand->getOptionalOperand(SymbolicName);
+      if (OM.hasValue())
+        return OM.getValue();
+    }
+    return None;
+  }
+
+  const OperandMatcher &getOperand(StringRef SymbolicName) const {
+    Optional<const OperandMatcher *>OM = getOptionalOperand(SymbolicName);
+    if (OM.hasValue())
+      return *OM.getValue();
+    llvm_unreachable("Failed to lookup operand");
+  }
+
+  unsigned getNumOperands() const { return Operands.size(); }
+  OperandVec::iterator operands_begin() { return Operands.begin(); }
+  OperandVec::iterator operands_end() { return Operands.end(); }
+  iterator_range<OperandVec::iterator> operands() {
+    return make_range(operands_begin(), operands_end());
+  }
+  OperandVec::const_iterator operands_begin() const { return Operands.begin(); }
+  OperandVec::const_iterator operands_end() const { return Operands.end(); }
+  iterator_range<OperandVec::const_iterator> operands() const {
+    return make_range(operands_begin(), operands_end());
+  }
+
+  /// Emit C++ statements to check the shape of the match and capture
+  /// instructions into local variables.
+  void emitCxxCaptureStmts(raw_ostream &OS, RuleMatcher &Rule, StringRef Expr) {
+    OS << "if (" << Expr << ".getNumOperands() < " << getNumOperands() << ")\n"
+       << "  return false;\n";
+    for (const auto &Operand : Operands) {
+      Operand->emitCxxCaptureStmts(OS, Rule, Operand->getOperandExpr(Expr));
+    }
+  }
+
+  /// Emit a C++ expression that tests whether the instruction named in
+  /// InsnVarName matches all the predicates and all the operands.
+  void emitCxxPredicateExpr(raw_ostream &OS, RuleMatcher &Rule,
+                            StringRef InsnVarName) const {
+    emitCxxPredicateListExpr(OS, Rule, InsnVarName);
+    for (const auto &Operand : Operands) {
+      OS << " &&\n(";
+      Operand->emitCxxPredicateExpr(OS, Rule, InsnVarName);
+      OS << ")";
+    }
+  }
+
+  /// Compare the priority of this object and B.
+  ///
+  /// Returns true if this object is more important than B.
+  bool isHigherPriorityThan(const InstructionMatcher &B) const {
+    // Instruction matchers involving more operands have higher priority.
+    if (Operands.size() > B.Operands.size())
+      return true;
+    if (Operands.size() < B.Operands.size())
+      return false;
+
+    for (const auto &Predicate : zip(predicates(), B.predicates())) {
+      if (std::get<0>(Predicate)->isHigherPriorityThan(*std::get<1>(Predicate)))
+        return true;
+      if (std::get<1>(Predicate)->isHigherPriorityThan(*std::get<0>(Predicate)))
+        return false;
+    }
+
+    for (const auto &Operand : zip(Operands, B.Operands)) {
+      if (std::get<0>(Operand)->isHigherPriorityThan(*std::get<1>(Operand)))
+        return true;
+      if (std::get<1>(Operand)->isHigherPriorityThan(*std::get<0>(Operand)))
+        return false;
+    }
+
+    return false;
+  };
+
+  /// Report the maximum number of temporary operands needed by the instruction
+  /// matcher.
+  unsigned countTemporaryOperands() const {
+    return std::accumulate(predicates().begin(), predicates().end(), 0,
+                           [](unsigned A,
+                              const std::unique_ptr<InstructionPredicateMatcher>
+                                  &Predicate) {
+                             return A + Predicate->countTemporaryOperands();
+                           }) +
+           std::accumulate(
+               Operands.begin(), Operands.end(), 0,
+               [](unsigned A, const std::unique_ptr<OperandMatcher> &Operand) {
+                 return A + Operand->countTemporaryOperands();
+               });
+  }
+};
+
+/// Generates code to check that the operand is a register defined by an
+/// instruction that matches the given instruction matcher.
+///
+/// For example, the pattern:
+///   (set $dst, (G_MUL (G_ADD $src1, $src2), $src3))
+/// would use an InstructionOperandMatcher for operand 1 of the G_MUL to match
+/// the:
+///   (G_ADD $src1, $src2)
+/// subpattern.
+class InstructionOperandMatcher : public OperandPredicateMatcher {
+protected:
+  std::unique_ptr<InstructionMatcher> InsnMatcher;
+
+public:
+  InstructionOperandMatcher()
+      : OperandPredicateMatcher(OPM_Instruction),
+        InsnMatcher(new InstructionMatcher()) {}
+
+  static bool classof(const OperandPredicateMatcher *P) {
+    return P->getKind() == OPM_Instruction;
+  }
+
+  InstructionMatcher &getInsnMatcher() const { return *InsnMatcher; }
+
+  Optional<const OperandMatcher *>
+  getOptionalOperand(StringRef SymbolicName) const override {
+    assert(!SymbolicName.empty() && "Cannot lookup unnamed operand");
+    return InsnMatcher->getOptionalOperand(SymbolicName);
+  }
+
+  void emitCxxCaptureStmts(raw_ostream &OS, RuleMatcher &Rule,
+                           StringRef OperandExpr) const override {
+    OS << "if (!" << OperandExpr + ".isReg())\n"
+       << "  return false;\n";
+    std::string InsnVarName = Rule.defineInsnVar(
+        OS, *InsnMatcher,
+        ("*MRI.getVRegDef(" + OperandExpr + ".getReg())").str());
+    InsnMatcher->emitCxxCaptureStmts(OS, Rule, InsnVarName);
+  }
+
+  void emitCxxPredicateExpr(raw_ostream &OS, RuleMatcher &Rule,
+                            StringRef OperandExpr) const override {
+    OperandExpr = Rule.getInsnVarName(*InsnMatcher);
+    OS << "(";
+    InsnMatcher->emitCxxPredicateExpr(OS, Rule, OperandExpr);
+    OS << ")\n";
+  }
+};
+
+//===- Actions ------------------------------------------------------------===//
+void OperandPlaceholder::emitCxxValueExpr(raw_ostream &OS) const {
+  switch (Kind) {
+  case OP_MatchReference:
+    OS << MatchReference.InsnMatcher->getOperand(MatchReference.SymbolicName)
+              .getOperandExpr(MatchReference.InsnVarName);
+    break;
+  case OP_Temporary:
+    OS << "TempOp" << Temporary.OpIdx;
+    break;
+  }
+}
+
+class OperandRenderer {
+public:
+  enum RendererKind { OR_Copy, OR_Imm, OR_Register, OR_ComplexPattern };
+
+protected:
+  RendererKind Kind;
+
+public:
+  OperandRenderer(RendererKind Kind) : Kind(Kind) {}
+  virtual ~OperandRenderer() {}
+
+  RendererKind getKind() const { return Kind; }
+
+  virtual void emitCxxRenderStmts(raw_ostream &OS, RuleMatcher &Rule) const = 0;
 };
 
-class MatcherEmitter {
+/// A CopyRenderer emits code to copy a single operand from an existing
+/// instruction to the one being built.
+class CopyRenderer : public OperandRenderer {
+protected:
+  /// The matcher for the instruction that this operand is copied from.
+  /// This provides the facility for looking up an a operand by it's name so
+  /// that it can be used as a source for the instruction being built.
+  const InstructionMatcher &Matched;
+  /// The name of the operand.
+  const StringRef SymbolicName;
+
+public:
+  CopyRenderer(const InstructionMatcher &Matched, StringRef SymbolicName)
+      : OperandRenderer(OR_Copy), Matched(Matched), SymbolicName(SymbolicName) {
+  }
+
+  static bool classof(const OperandRenderer *R) {
+    return R->getKind() == OR_Copy;
+  }
+
+  const StringRef getSymbolicName() const { return SymbolicName; }
+
+  void emitCxxRenderStmts(raw_ostream &OS, RuleMatcher &Rule) const override {
+    const OperandMatcher &Operand = Matched.getOperand(SymbolicName);
+    StringRef InsnVarName =
+        Rule.getInsnVarName(Operand.getInstructionMatcher());
+    std::string OperandExpr = Operand.getOperandExpr(InsnVarName);
+    OS << "    MIB.add(" << OperandExpr << "/*" << SymbolicName << "*/);\n";
+  }
+};
+
+/// Adds a specific physical register to the instruction being built.
+/// This is typically useful for WZR/XZR on AArch64.
+class AddRegisterRenderer : public OperandRenderer {
+protected:
+  const Record *RegisterDef;
+
+public:
+  AddRegisterRenderer(const Record *RegisterDef)
+      : OperandRenderer(OR_Register), RegisterDef(RegisterDef) {}
+
+  static bool classof(const OperandRenderer *R) {
+    return R->getKind() == OR_Register;
+  }
+
+  void emitCxxRenderStmts(raw_ostream &OS, RuleMatcher &Rule) const override {
+    OS << "    MIB.addReg(" << RegisterDef->getValueAsString("Namespace")
+       << "::" << RegisterDef->getName() << ");\n";
+  }
+};
+
+/// Adds a specific immediate to the instruction being built.
+class ImmRenderer : public OperandRenderer {
+protected:
+  int64_t Imm;
+
+public:
+  ImmRenderer(int64_t Imm)
+      : OperandRenderer(OR_Imm), Imm(Imm) {}
+
+  static bool classof(const OperandRenderer *R) {
+    return R->getKind() == OR_Imm;
+  }
+
+  void emitCxxRenderStmts(raw_ostream &OS, RuleMatcher &Rule) const override {
+    OS << "    MIB.addImm(" << Imm << ");\n";
+  }
+};
+
+class RenderComplexPatternOperand : public OperandRenderer {
+private:
+  const Record &TheDef;
+  std::vector<OperandPlaceholder> Sources;
+
+  unsigned getNumOperands() const {
+    return TheDef.getValueAsDag("Operands")->getNumArgs();
+  }
+
+public:
+  RenderComplexPatternOperand(const Record &TheDef,
+                              const ArrayRef<OperandPlaceholder> Sources)
+      : OperandRenderer(OR_ComplexPattern), TheDef(TheDef), Sources(Sources) {}
+
+  static bool classof(const OperandRenderer *R) {
+    return R->getKind() == OR_ComplexPattern;
+  }
+
+  void emitCxxRenderStmts(raw_ostream &OS, RuleMatcher &Rule) const override {
+    assert(Sources.size() == getNumOperands() && "Inconsistent number of operands");
+    for (const auto &Source : Sources) {
+      OS << "MIB.add(";
+      Source.emitCxxValueExpr(OS);
+      OS << ");\n";
+    }
+  }
+};
+
+/// An action taken when all Matcher predicates succeeded for a parent rule.
+///
+/// Typical actions include:
+/// * Changing the opcode of an instruction.
+/// * Adding an operand to an instruction.
+class MatchAction {
+public:
+  virtual ~MatchAction() {}
+
+  /// Emit the C++ statements to implement the action.
+  ///
+  /// \param RecycleVarName If given, it's an instruction to recycle. The
+  ///                       requirements on the instruction vary from action to
+  ///                       action.
+  virtual void emitCxxActionStmts(raw_ostream &OS, RuleMatcher &Rule,
+                                  StringRef RecycleVarName) const = 0;
+};
+
+/// Generates a comment describing the matched rule being acted upon.
+class DebugCommentAction : public MatchAction {
+private:
   const PatternToMatch &P;
 
 public:
-  std::vector<std::unique_ptr<Matcher>> Matchers;
-  std::vector<std::unique_ptr<MatchAction>> Actions;
+  DebugCommentAction(const PatternToMatch &P) : P(P) {}
+
+  void emitCxxActionStmts(raw_ostream &OS, RuleMatcher &Rule,
+                          StringRef RecycleVarName) const override {
+    OS << "// " << *P.getSrcPattern() << "  =>  " << *P.getDstPattern() << "\n";
+  }
+};
 
-  MatcherEmitter(const PatternToMatch &P) : P(P) {}
+/// Generates code to build an instruction or mutate an existing instruction
+/// into the desired instruction when this is possible.
+class BuildMIAction : public MatchAction {
+private:
+  const CodeGenInstruction *I;
+  const InstructionMatcher &Matched;
+  std::vector<std::unique_ptr<OperandRenderer>> OperandRenderers;
+
+  /// True if the instruction can be built solely by mutating the opcode.
+  bool canMutate() const {
+    for (const auto &Renderer : enumerate(OperandRenderers)) {
+      if (const auto *Copy = dyn_cast<CopyRenderer>(&*Renderer.value())) {
+        if (Matched.getOperand(Copy->getSymbolicName()).getOperandIndex() !=
+            Renderer.index())
+          return false;
+      } else
+        return false;
+    }
 
-  void emit(raw_ostream &OS) {
-    if (Matchers.empty())
-      llvm_unreachable("Unexpected empty matcher!");
+    return true;
+  }
 
-    OS << "  // Src: " << *P.getSrcPattern() << "\n"
-       << "  // Dst: " << *P.getDstPattern() << "\n";
+public:
+  BuildMIAction(const CodeGenInstruction *I, const InstructionMatcher &Matched)
+      : I(I), Matched(Matched) {}
+
+  template <class Kind, class... Args>
+  Kind &addRenderer(Args&&... args) {
+    OperandRenderers.emplace_back(
+        llvm::make_unique<Kind>(std::forward<Args>(args)...));
+    return *static_cast<Kind *>(OperandRenderers.back().get());
+  }
 
-    OS << "  if ((" << *Matchers.front() << ")";
-    for (auto &MA : makeArrayRef(Matchers).drop_front())
-      OS << " &&\n      (" << *MA << ")";
-    OS << ") {\n";
+  void emitCxxActionStmts(raw_ostream &OS, RuleMatcher &Rule,
+                          StringRef RecycleVarName) const override {
+    if (canMutate()) {
+      OS << "    " << RecycleVarName << ".setDesc(TII.get(" << I->Namespace
+         << "::" << I->TheDef->getName() << "));\n";
 
-    for (auto &MA : Actions)
-      OS << "    " << *MA << "\n";
+      if (!I->ImplicitDefs.empty() || !I->ImplicitUses.empty()) {
+        OS << "    auto MIB = MachineInstrBuilder(MF, &" << RecycleVarName
+           << ");\n";
 
-    OS << "    constrainSelectedInstRegOperands(I, TII, TRI, RBI);\n";
-    OS << "    return true;\n";
-    OS << "  }\n";
+        for (auto Def : I->ImplicitDefs) {
+          auto Namespace = Def->getValueAsString("Namespace");
+          OS << "    MIB.addDef(" << Namespace << "::" << Def->getName()
+             << ", RegState::Implicit);\n";
+        }
+        for (auto Use : I->ImplicitUses) {
+          auto Namespace = Use->getValueAsString("Namespace");
+          OS << "    MIB.addUse(" << Namespace << "::" << Use->getName()
+             << ", RegState::Implicit);\n";
+        }
+      }
+
+      OS << "    MachineInstr &NewI = " << RecycleVarName << ";\n";
+      return;
+    }
+
+    // TODO: Simple permutation looks like it could be almost as common as
+    //       mutation due to commutative operations.
+
+    OS << "MachineInstrBuilder MIB = BuildMI(*I.getParent(), I, "
+          "I.getDebugLoc(), TII.get("
+       << I->Namespace << "::" << I->TheDef->getName() << "));\n";
+    for (const auto &Renderer : OperandRenderers)
+      Renderer->emitCxxRenderStmts(OS, Rule);
+    OS << "    for (const auto *FromMI : ";
+    Rule.emitCxxCapturedInsnList(OS);
+    OS << ")\n";
+    OS << "      for (const auto &MMO : FromMI->memoperands())\n";
+    OS << "        MIB.addMemOperand(MMO);\n";
+    OS << "    " << RecycleVarName << ".eraseFromParent();\n";
+    OS << "    MachineInstr &NewI = *MIB;\n";
   }
 };
 
+InstructionMatcher &RuleMatcher::addInstructionMatcher() {
+  Matchers.emplace_back(new InstructionMatcher());
+  return *Matchers.back();
+}
+
+template <class Kind, class... Args>
+Kind &RuleMatcher::addAction(Args &&... args) {
+  Actions.emplace_back(llvm::make_unique<Kind>(std::forward<Args>(args)...));
+  return *static_cast<Kind *>(Actions.back().get());
+}
+
+std::string RuleMatcher::defineInsnVar(raw_ostream &OS,
+                                       const InstructionMatcher &Matcher,
+                                       StringRef Value) {
+  std::string InsnVarName = "MI" + llvm::to_string(NextInsnVarID++);
+  OS << "MachineInstr &" << InsnVarName << " = " << Value << ";\n";
+  InsnVariableNames[&Matcher] = InsnVarName;
+  return InsnVarName;
+}
+
+StringRef RuleMatcher::getInsnVarName(const InstructionMatcher &InsnMatcher) const {
+  const auto &I = InsnVariableNames.find(&InsnMatcher);
+  if (I != InsnVariableNames.end())
+    return I->second;
+  llvm_unreachable("Matched Insn was not captured in a local variable");
+}
+
+/// Emit a C++ initializer_list containing references to every matched instruction.
+void RuleMatcher::emitCxxCapturedInsnList(raw_ostream &OS) {
+  SmallVector<StringRef, 2> Names;
+  for (const auto &Pair : InsnVariableNames)
+    Names.push_back(Pair.second);
+  std::sort(Names.begin(), Names.end());
+
+  OS << "{";
+  for (const auto &Name : Names)
+    OS << "&" << Name << ", ";
+  OS << "}";
+}
+
+/// Emit C++ statements to check the shape of the match and capture
+/// instructions into local variables.
+void RuleMatcher::emitCxxCaptureStmts(raw_ostream &OS, StringRef Expr) {
+  assert(Matchers.size() == 1 && "Cannot handle multi-root matchers yet");
+  std::string InsnVarName = defineInsnVar(OS, *Matchers.front(), Expr);
+  Matchers.front()->emitCxxCaptureStmts(OS, *this, InsnVarName);
+}
+
+void RuleMatcher::emit(raw_ostream &OS) {
+  if (Matchers.empty())
+    llvm_unreachable("Unexpected empty matcher!");
+
+  // The representation supports rules that require multiple roots such as:
+  //    %ptr(p0) = ...
+  //    %elt0(s32) = G_LOAD %ptr
+  //    %1(p0) = G_ADD %ptr, 4
+  //    %elt1(s32) = G_LOAD p0 %1
+  // which could be usefully folded into:
+  //    %ptr(p0) = ...
+  //    %elt0(s32), %elt1(s32) = TGT_LOAD_PAIR %ptr
+  // on some targets but we don't need to make use of that yet.
+  assert(Matchers.size() == 1 && "Cannot handle multi-root matchers yet");
+  OS << "if ([&]() {\n";
+
+  emitCxxCaptureStmts(OS, "I");
+
+  OS << "    if (";
+  Matchers.front()->emitCxxPredicateExpr(OS, *this,
+                                         getInsnVarName(*Matchers.front()));
+  OS << ") {\n";
+
+  // We must also check if it's safe to fold the matched instructions.
+  if (InsnVariableNames.size() >= 2) {
+    for (const auto &Pair : InsnVariableNames) {
+      // Skip the root node since it isn't moving anywhere. Everything else is
+      // sinking to meet it.
+      if (Pair.first == Matchers.front().get())
+        continue;
+
+      // Reject the difficult cases until we have a more accurate check.
+      OS << "      if (!isObviouslySafeToFold(" << Pair.second
+         << ")) return false;\n";
+
+      // FIXME: Emit checks to determine it's _actually_ safe to fold and/or
+      //        account for unsafe cases.
+      //
+      //        Example:
+      //          MI1--> %0 = ...
+      //                 %1 = ... %0
+      //          MI0--> %2 = ... %0
+      //          It's not safe to erase MI1. We currently handle this by not
+      //          erasing %0 (even when it's dead).
+      //
+      //        Example:
+      //          MI1--> %0 = load volatile @a
+      //                 %1 = load volatile @a
+      //          MI0--> %2 = ... %0
+      //          It's not safe to sink %0's def past %1. We currently handle
+      //          this by rejecting all loads.
+      //
+      //        Example:
+      //          MI1--> %0 = load @a
+      //                 %1 = store @a
+      //          MI0--> %2 = ... %0
+      //          It's not safe to sink %0's def past %1. We currently handle
+      //          this by rejecting all loads.
+      //
+      //        Example:
+      //                   G_CONDBR %cond, @BB1
+      //                 BB0:
+      //          MI1-->   %0 = load @a
+      //                   G_BR @BB1
+      //                 BB1:
+      //          MI0-->   %2 = ... %0
+      //          It's not always safe to sink %0 across control flow. In this
+      //          case it may introduce a memory fault. We currentl handle this
+      //          by rejecting all loads.
+    }
+  }
+
+  for (const auto &MA : Actions) {
+    MA->emitCxxActionStmts(OS, *this, "I");
+  }
+
+  OS << "      constrainSelectedInstRegOperands(NewI, TII, TRI, RBI);\n";
+  OS << "      return true;\n";
+  OS << "    }\n";
+  OS << "    return false;\n";
+  OS << "  }()) { return true; }\n\n";
+}
+
+bool RuleMatcher::isHigherPriorityThan(const RuleMatcher &B) const {
+  // Rules involving more match roots have higher priority.
+  if (Matchers.size() > B.Matchers.size())
+    return true;
+  if (Matchers.size() < B.Matchers.size())
+    return false;
+
+  for (const auto &Matcher : zip(Matchers, B.Matchers)) {
+    if (std::get<0>(Matcher)->isHigherPriorityThan(*std::get<1>(Matcher)))
+      return true;
+    if (std::get<1>(Matcher)->isHigherPriorityThan(*std::get<0>(Matcher)))
+      return false;
+  }
+
+  return false;
+}
+
+unsigned RuleMatcher::countTemporaryOperands() const {
+  return std::accumulate(
+      Matchers.begin(), Matchers.end(), 0,
+      [](unsigned A, const std::unique_ptr<InstructionMatcher> &Matcher) {
+        return A + Matcher->countTemporaryOperands();
+      });
+}
+
 //===- GlobalISelEmitter class --------------------------------------------===//
 
+class GlobalISelEmitter {
+public:
+  explicit GlobalISelEmitter(RecordKeeper &RK);
+  void run(raw_ostream &OS);
+
+private:
+  const RecordKeeper &RK;
+  const CodeGenDAGPatterns CGP;
+  const CodeGenTarget &Target;
+
+  /// Keep track of the equivalence between SDNodes and Instruction.
+  /// This is defined using 'GINodeEquiv' in the target description.
+  DenseMap<Record *, const CodeGenInstruction *> NodeEquivs;
+
+  /// Keep track of the equivalence between ComplexPattern's and
+  /// GIComplexOperandMatcher. Map entries are specified by subclassing
+  /// GIComplexPatternEquiv.
+  DenseMap<const Record *, const Record *> ComplexPatternEquivs;
+
+  void gatherNodeEquivs();
+  const CodeGenInstruction *findNodeEquiv(Record *N) const;
+
+  Error importRulePredicates(RuleMatcher &M, ArrayRef<Init *> Predicates) const;
+  Expected<InstructionMatcher &>
+  createAndImportSelDAGMatcher(InstructionMatcher &InsnMatcher,
+                               const TreePatternNode *Src) const;
+  Error importChildMatcher(InstructionMatcher &InsnMatcher,
+                           TreePatternNode *SrcChild, unsigned OpIdx,
+                           unsigned &TempOpIdx) const;
+  Expected<BuildMIAction &> createAndImportInstructionRenderer(
+      RuleMatcher &M, const TreePatternNode *Dst,
+      const InstructionMatcher &InsnMatcher) const;
+  Error importExplicitUseRenderer(BuildMIAction &DstMIBuilder,
+                                  TreePatternNode *DstChild,
+                                  const InstructionMatcher &InsnMatcher) const;
+  Error
+  importImplicitDefRenderers(BuildMIAction &DstMIBuilder,
+                             const std::vector<Record *> &ImplicitDefs) const;
+
+  /// Analyze pattern \p P, returning a matcher for it if possible.
+  /// Otherwise, return an Error explaining why we don't support it.
+  Expected<RuleMatcher> runOnPattern(const PatternToMatch &P);
+};
+
 void GlobalISelEmitter::gatherNodeEquivs() {
   assert(NodeEquivs.empty());
   for (Record *Equiv : RK.getAllDerivedDefinitions("GINodeEquiv"))
     NodeEquivs[Equiv->getValueAsDef("Node")] =
         &Target.getInstruction(Equiv->getValueAsDef("I"));
+
+  assert(ComplexPatternEquivs.empty());
+  for (Record *Equiv : RK.getAllDerivedDefinitions("GIComplexPatternEquiv")) {
+    Record *SelDAGEquiv = Equiv->getValueAsDef("SelDAGEquivalent");
+    if (!SelDAGEquiv)
+      continue;
+    ComplexPatternEquivs[SelDAGEquiv] = Equiv;
+ }
 }
 
-const CodeGenInstruction *GlobalISelEmitter::findNodeEquiv(Record *N) {
+const CodeGenInstruction *GlobalISelEmitter::findNodeEquiv(Record *N) const {
   return NodeEquivs.lookup(N);
 }
 
@@ -231,126 +1313,373 @@ GlobalISelEmitter::GlobalISelEmitter(RecordKeeper &RK)
 
 //===- Emitter ------------------------------------------------------------===//
 
-Optional<GlobalISelEmitter::SkipReason>
-GlobalISelEmitter::runOnPattern(const PatternToMatch &P, raw_ostream &OS) {
+Error
+GlobalISelEmitter::importRulePredicates(RuleMatcher &M,
+                                        ArrayRef<Init *> Predicates) const {
+  if (!Predicates.empty())
+    return failedImport("Pattern has a rule predicate (" +
+                        explainRulePredicates(Predicates) + ")");
+  return Error::success();
+}
 
-  // Keep track of the matchers and actions to emit.
-  MatcherEmitter M(P);
+Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
+    InstructionMatcher &InsnMatcher, const TreePatternNode *Src) const {
+  // Start with the defined operands (i.e., the results of the root operator).
+  if (Src->getExtTypes().size() > 1)
+    return failedImport("Src pattern has multiple results");
+
+  auto SrcGIOrNull = findNodeEquiv(Src->getOperator());
+  if (!SrcGIOrNull)
+    return failedImport("Pattern operator lacks an equivalent Instruction" +
+                        explainOperator(Src->getOperator()));
+  auto &SrcGI = *SrcGIOrNull;
 
-  // First, analyze the whole pattern.
-  // If the entire pattern has a predicate (e.g., target features), ignore it.
-  if (!P.getPredicates()->getValues().empty())
-    return SkipReason{"Pattern has a predicate"};
+  // The operators look good: match the opcode and mutate it to the new one.
+  InsnMatcher.addPredicate<InstructionOpcodeMatcher>(&SrcGI);
 
-  // Physreg imp-defs require additional logic.  Ignore the pattern.
-  if (!P.getDstRegs().empty())
-    return SkipReason{"Pattern defines a physical register"};
+  unsigned OpIdx = 0;
+  unsigned TempOpIdx = 0;
+  for (const EEVT::TypeSet &Ty : Src->getExtTypes()) {
+    auto OpTyOrNone = MVTToLLT(Ty.getConcrete());
 
-  // Next, analyze the pattern operators.
-  TreePatternNode *Src = P.getSrcPattern();
-  TreePatternNode *Dst = P.getDstPattern();
+    if (!OpTyOrNone)
+      return failedImport(
+          "Result of Src pattern operator has an unsupported type");
 
-  // If the root of either pattern isn't a simple operator, ignore it.
-  if (!isTrivialOperatorNode(Dst))
-    return SkipReason{"Dst pattern root isn't a trivial operator"};
-  if (!isTrivialOperatorNode(Src))
-    return SkipReason{"Src pattern root isn't a trivial operator"};
+    // Results don't have a name unless they are the root node. The caller will
+    // set the name if appropriate.
+    OperandMatcher &OM = InsnMatcher.addOperand(OpIdx++, "", TempOpIdx);
+    OM.addPredicate<LLTOperandMatcher>(*OpTyOrNone);
+  }
 
-  Record *DstOp = Dst->getOperator();
-  if (!DstOp->isSubClassOf("Instruction"))
-    return SkipReason{"Pattern operator isn't an instruction"};
+  // Match the used operands (i.e. the children of the operator).
+  for (unsigned i = 0, e = Src->getNumChildren(); i != e; ++i) {
+    if (auto Error = importChildMatcher(InsnMatcher, Src->getChild(i), OpIdx++,
+                                        TempOpIdx))
+      return std::move(Error);
+  }
 
-  auto &DstI = Target.getInstruction(DstOp);
+  return InsnMatcher;
+}
 
-  auto SrcGIOrNull = findNodeEquiv(Src->getOperator());
-  if (!SrcGIOrNull)
-    return SkipReason{"Pattern operator lacks an equivalent Instruction"};
-  auto &SrcGI = *SrcGIOrNull;
+Error GlobalISelEmitter::importChildMatcher(InstructionMatcher &InsnMatcher,
+                                            TreePatternNode *SrcChild,
+                                            unsigned OpIdx,
+                                            unsigned &TempOpIdx) const {
+  OperandMatcher &OM =
+      InsnMatcher.addOperand(OpIdx, SrcChild->getName(), TempOpIdx);
+
+  if (SrcChild->hasAnyPredicate())
+    return failedImport("Src pattern child has predicate (" +
+                        explainPredicates(SrcChild) + ")");
+
+  ArrayRef<EEVT::TypeSet> ChildTypes = SrcChild->getExtTypes();
+  if (ChildTypes.size() != 1)
+    return failedImport("Src pattern child has multiple results");
+
+  // Check MBB's before the type check since they are not a known type.
+  if (!SrcChild->isLeaf()) {
+    if (SrcChild->getOperator()->isSubClassOf("SDNode")) {
+      auto &ChildSDNI = CGP.getSDNodeInfo(SrcChild->getOperator());
+      if (ChildSDNI.getSDClassName() == "BasicBlockSDNode") {
+        OM.addPredicate<MBBOperandMatcher>();
+        return Error::success();
+      }
+    }
+  }
 
-  // The operators look good: match the opcode and mutate it to the new one.
-  M.Matchers.emplace_back(new MatchOpcode(&SrcGI));
-  M.Actions.emplace_back(new MutateOpcode(&DstI));
+  auto OpTyOrNone = MVTToLLT(ChildTypes.front().getConcrete());
+  if (!OpTyOrNone)
+    return failedImport("Src operand has an unsupported type");
+  OM.addPredicate<LLTOperandMatcher>(*OpTyOrNone);
+
+  // Check for nested instructions.
+  if (!SrcChild->isLeaf()) {
+    // Map the node to a gMIR instruction.
+    InstructionOperandMatcher &InsnOperand =
+        OM.addPredicate<InstructionOperandMatcher>();
+    auto InsnMatcherOrError =
+        createAndImportSelDAGMatcher(InsnOperand.getInsnMatcher(), SrcChild);
+    if (auto Error = InsnMatcherOrError.takeError())
+      return Error;
+
+    return Error::success();
+  }
 
-  // Next, analyze the children, only accepting patterns that don't require
-  // any change to operands.
-  if (Src->getNumChildren() != Dst->getNumChildren())
-    return SkipReason{"Src/dst patterns have a different # of children"};
+  // Check for constant immediates.
+  if (auto *ChildInt = dyn_cast<IntInit>(SrcChild->getLeafValue())) {
+    OM.addPredicate<IntOperandMatcher>(ChildInt->getValue());
+    return Error::success();
+  }
 
-  unsigned OpIdx = 0;
+  // Check for def's like register classes or ComplexPattern's.
+  if (auto *ChildDefInit = dyn_cast<DefInit>(SrcChild->getLeafValue())) {
+    auto *ChildRec = ChildDefInit->getDef();
 
-  // Start with the defined operands (i.e., the results of the root operator).
-  if (DstI.Operands.NumDefs != Src->getExtTypes().size())
-    return SkipReason{"Src pattern results and dst MI defs are different"};
+    // Check for register classes.
+    if (ChildRec->isSubClassOf("RegisterClass")) {
+      OM.addPredicate<RegisterBankOperandMatcher>(
+          Target.getRegisterClass(ChildRec));
+      return Error::success();
+    }
 
-  for (const EEVT::TypeSet &Ty : Src->getExtTypes()) {
-    Record *DstIOpRec = DstI.Operands[OpIdx].Rec;
-    if (!DstIOpRec->isSubClassOf("RegisterClass"))
-      return SkipReason{"Dst MI def isn't a register class"};
+    // Check for ComplexPattern's.
+    if (ChildRec->isSubClassOf("ComplexPattern")) {
+      const auto &ComplexPattern = ComplexPatternEquivs.find(ChildRec);
+      if (ComplexPattern == ComplexPatternEquivs.end())
+        return failedImport("SelectionDAG ComplexPattern (" +
+                            ChildRec->getName() + ") not mapped to GlobalISel");
+
+      const auto &Predicate = OM.addPredicate<ComplexPatternOperandMatcher>(
+          OM, *ComplexPattern->second);
+      TempOpIdx += Predicate.countTemporaryOperands();
+      return Error::success();
+    }
 
-    auto OpTyOrNone = MVTToLLT(Ty.getConcrete());
-    if (!OpTyOrNone)
-      return SkipReason{"Dst operand has an unsupported type"};
+    if (ChildRec->isSubClassOf("ImmLeaf")) {
+      return failedImport(
+          "Src pattern child def is an unsupported tablegen class (ImmLeaf)");
+    }
 
-    M.Matchers.emplace_back(new MatchRegOpType(OpIdx, *OpTyOrNone));
-    M.Matchers.emplace_back(
-        new MatchRegOpBank(OpIdx, Target.getRegisterClass(DstIOpRec)));
-    ++OpIdx;
+    return failedImport(
+        "Src pattern child def is an unsupported tablegen class");
   }
 
-  // Finally match the used operands (i.e., the children of the root operator).
-  for (unsigned i = 0, e = Src->getNumChildren(); i != e; ++i) {
-    auto *SrcChild = Src->getChild(i);
-    auto *DstChild = Dst->getChild(i);
-
-    // Patterns can reorder operands.  Ignore those for now.
-    if (SrcChild->getName() != DstChild->getName())
-      return SkipReason{"Src/dst pattern children not in same order"};
-
-    // The only non-leaf child we accept is 'bb': it's an operator because
-    // BasicBlockSDNode isn't inline, but in MI it's just another operand.
-    if (!SrcChild->isLeaf()) {
-      if (DstChild->isLeaf() ||
-          SrcChild->getOperator() != DstChild->getOperator())
-        return SkipReason{"Src/dst pattern child operator mismatch"};
-
-      if (SrcChild->getOperator()->isSubClassOf("SDNode")) {
-        auto &ChildSDNI = CGP.getSDNodeInfo(SrcChild->getOperator());
-        if (ChildSDNI.getSDClassName() == "BasicBlockSDNode") {
-          M.Matchers.emplace_back(new MatchMBBOp(OpIdx++));
-          continue;
-        }
+  return failedImport("Src pattern child is an unsupported kind");
+}
+
+Error GlobalISelEmitter::importExplicitUseRenderer(
+    BuildMIAction &DstMIBuilder, TreePatternNode *DstChild,
+    const InstructionMatcher &InsnMatcher) const {
+  // The only non-leaf child we accept is 'bb': it's an operator because
+  // BasicBlockSDNode isn't inline, but in MI it's just another operand.
+  if (!DstChild->isLeaf()) {
+    if (DstChild->getOperator()->isSubClassOf("SDNode")) {
+      auto &ChildSDNI = CGP.getSDNodeInfo(DstChild->getOperator());
+      if (ChildSDNI.getSDClassName() == "BasicBlockSDNode") {
+        DstMIBuilder.addRenderer<CopyRenderer>(InsnMatcher,
+                                               DstChild->getName());
+        return Error::success();
       }
-      return SkipReason{"Src pattern child isn't a leaf node"};
     }
+    return failedImport("Dst pattern child isn't a leaf node or an MBB");
+  }
 
-    if (SrcChild->getLeafValue() != DstChild->getLeafValue())
-      return SkipReason{"Src/dst pattern child leaf mismatch"};
+  // Otherwise, we're looking for a bog-standard RegisterClass operand.
+  if (DstChild->hasAnyPredicate())
+    return failedImport("Dst pattern child has predicate (" +
+                        explainPredicates(DstChild) + ")");
 
-    // Otherwise, we're looking for a bog-standard RegisterClass operand.
-    if (SrcChild->hasAnyPredicate())
-      return SkipReason{"Src pattern child has predicate"};
-    auto *ChildRec = cast<DefInit>(SrcChild->getLeafValue())->getDef();
-    if (!ChildRec->isSubClassOf("RegisterClass"))
-      return SkipReason{"Src pattern child isn't a RegisterClass"};
+  if (auto *ChildDefInit = dyn_cast<DefInit>(DstChild->getLeafValue())) {
+    auto *ChildRec = ChildDefInit->getDef();
 
-    ArrayRef<EEVT::TypeSet> ChildTypes = SrcChild->getExtTypes();
+    ArrayRef<EEVT::TypeSet> ChildTypes = DstChild->getExtTypes();
     if (ChildTypes.size() != 1)
-      return SkipReason{"Src pattern child has multiple results"};
+      return failedImport("Dst pattern child has multiple results");
 
     auto OpTyOrNone = MVTToLLT(ChildTypes.front().getConcrete());
     if (!OpTyOrNone)
-      return SkipReason{"Src operand has an unsupported type"};
+      return failedImport("Dst operand has an unsupported type");
+
+    if (ChildRec->isSubClassOf("Register")) {
+      DstMIBuilder.addRenderer<AddRegisterRenderer>(ChildRec);
+      return Error::success();
+    }
+
+    if (ChildRec->isSubClassOf("RegisterClass")) {
+      DstMIBuilder.addRenderer<CopyRenderer>(InsnMatcher, DstChild->getName());
+      return Error::success();
+    }
+
+    if (ChildRec->isSubClassOf("ComplexPattern")) {
+      const auto &ComplexPattern = ComplexPatternEquivs.find(ChildRec);
+      if (ComplexPattern == ComplexPatternEquivs.end())
+        return failedImport(
+            "SelectionDAG ComplexPattern not mapped to GlobalISel");
+
+      SmallVector<OperandPlaceholder, 2> RenderedOperands;
+      const OperandMatcher &OM = InsnMatcher.getOperand(DstChild->getName());
+      for (unsigned I = 0; I < OM.countTemporaryOperands(); ++I)
+        RenderedOperands.push_back(OperandPlaceholder::CreateTemporary(
+            OM.getAllocatedTemporariesBaseID() + I));
+      DstMIBuilder.addRenderer<RenderComplexPatternOperand>(
+          *ComplexPattern->second, RenderedOperands);
+      return Error::success();
+    }
 
-    M.Matchers.emplace_back(new MatchRegOpType(OpIdx, *OpTyOrNone));
-    M.Matchers.emplace_back(
-        new MatchRegOpBank(OpIdx, Target.getRegisterClass(ChildRec)));
+    if (ChildRec->isSubClassOf("SDNodeXForm"))
+      return failedImport("Dst pattern child def is an unsupported tablegen "
+                          "class (SDNodeXForm)");
+
+    return failedImport(
+        "Dst pattern child def is an unsupported tablegen class");
+  }
+
+  return failedImport("Dst pattern child is an unsupported kind");
+}
+
+Expected<BuildMIAction &> GlobalISelEmitter::createAndImportInstructionRenderer(
+    RuleMatcher &M, const TreePatternNode *Dst,
+    const InstructionMatcher &InsnMatcher) const {
+  Record *DstOp = Dst->getOperator();
+  if (!DstOp->isSubClassOf("Instruction")) {
+    if (DstOp->isSubClassOf("ValueType"))
+      return failedImport(
+          "Pattern operator isn't an instruction (it's a ValueType)");
+    return failedImport("Pattern operator isn't an instruction");
+  }
+  auto &DstI = Target.getInstruction(DstOp);
+
+  auto &DstMIBuilder = M.addAction<BuildMIAction>(&DstI, InsnMatcher);
+
+  // Render the explicit defs.
+  for (unsigned I = 0; I < DstI.Operands.NumDefs; ++I) {
+    const auto &DstIOperand = DstI.Operands[I];
+    DstMIBuilder.addRenderer<CopyRenderer>(InsnMatcher, DstIOperand.Name);
+  }
+
+  // Figure out which operands need defaults inserted. Operands that subclass
+  // OperandWithDefaultOps are considered from left to right until we have
+  // enough operands to render the instruction.
+  SmallSet<unsigned, 2> DefaultOperands;
+  unsigned DstINumUses = DstI.Operands.size() - DstI.Operands.NumDefs;
+  unsigned NumDefaultOperands = 0;
+  for (unsigned I = 0; I < DstINumUses &&
+                       DstINumUses > Dst->getNumChildren() + NumDefaultOperands;
+       ++I) {
+    const auto &DstIOperand = DstI.Operands[DstI.Operands.NumDefs + I];
+    if (DstIOperand.Rec->isSubClassOf("OperandWithDefaultOps")) {
+      DefaultOperands.insert(I);
+      NumDefaultOperands +=
+          DstIOperand.Rec->getValueAsDag("DefaultOps")->getNumArgs();
+    }
+  }
+  if (DstINumUses > Dst->getNumChildren() + DefaultOperands.size())
+    return failedImport("Insufficient operands supplied and default ops "
+                        "couldn't make up the shortfall");
+  if (DstINumUses < Dst->getNumChildren() + DefaultOperands.size())
+    return failedImport("Too many operands supplied");
+
+  // Render the explicit uses.
+  unsigned Child = 0;
+  for (unsigned I = 0; I != DstINumUses; ++I) {
+    // If we need to insert default ops here, then do so.
+    if (DefaultOperands.count(I)) {
+      const auto &DstIOperand = DstI.Operands[DstI.Operands.NumDefs + I];
+
+      DagInit *DefaultOps = DstIOperand.Rec->getValueAsDag("DefaultOps");
+      for (const auto *DefaultOp : DefaultOps->args()) {
+        // Look through ValueType operators.
+        if (const DagInit *DefaultDagOp = dyn_cast<DagInit>(DefaultOp)) {
+          if (const DefInit *DefaultDagOperator =
+                  dyn_cast<DefInit>(DefaultDagOp->getOperator())) {
+            if (DefaultDagOperator->getDef()->isSubClassOf("ValueType"))
+              DefaultOp = DefaultDagOp->getArg(0);
+          }
+        }
+
+        if (const DefInit *DefaultDefOp = dyn_cast<DefInit>(DefaultOp)) {
+          DstMIBuilder.addRenderer<AddRegisterRenderer>(DefaultDefOp->getDef());
+          continue;
+        }
+
+        if (const IntInit *DefaultIntOp = dyn_cast<IntInit>(DefaultOp)) {
+          DstMIBuilder.addRenderer<ImmRenderer>(DefaultIntOp->getValue());
+          continue;
+        }
+
+        return failedImport("Could not add default op");
+      }
+
+      continue;
+    }
+
+    if (auto Error = importExplicitUseRenderer(
+            DstMIBuilder, Dst->getChild(Child), InsnMatcher))
+      return std::move(Error);
+    ++Child;
+  }
+
+  return DstMIBuilder;
+}
+
+Error GlobalISelEmitter::importImplicitDefRenderers(
+    BuildMIAction &DstMIBuilder,
+    const std::vector<Record *> &ImplicitDefs) const {
+  if (!ImplicitDefs.empty())
+    return failedImport("Pattern defines a physical register");
+  return Error::success();
+}
+
+Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
+  // Keep track of the matchers and actions to emit.
+  RuleMatcher M;
+  M.addAction<DebugCommentAction>(P);
+
+  if (auto Error = importRulePredicates(M, P.getPredicates()->getValues()))
+    return std::move(Error);
+
+  // Next, analyze the pattern operators.
+  TreePatternNode *Src = P.getSrcPattern();
+  TreePatternNode *Dst = P.getDstPattern();
+
+  // If the root of either pattern isn't a simple operator, ignore it.
+  if (auto Err = isTrivialOperatorNode(Dst))
+    return failedImport("Dst pattern root isn't a trivial operator (" +
+                        toString(std::move(Err)) + ")");
+  if (auto Err = isTrivialOperatorNode(Src))
+    return failedImport("Src pattern root isn't a trivial operator (" +
+                        toString(std::move(Err)) + ")");
+
+  // Start with the defined operands (i.e., the results of the root operator).
+  Record *DstOp = Dst->getOperator();
+  if (!DstOp->isSubClassOf("Instruction"))
+    return failedImport("Pattern operator isn't an instruction");
+
+  auto &DstI = Target.getInstruction(DstOp);
+  if (DstI.Operands.NumDefs != Src->getExtTypes().size())
+    return failedImport("Src pattern results and dst MI defs are different (" +
+                        to_string(Src->getExtTypes().size()) + " def(s) vs " +
+                        to_string(DstI.Operands.NumDefs) + " def(s))");
+
+  InstructionMatcher &InsnMatcherTemp = M.addInstructionMatcher();
+  auto InsnMatcherOrError = createAndImportSelDAGMatcher(InsnMatcherTemp, Src);
+  if (auto Error = InsnMatcherOrError.takeError())
+    return std::move(Error);
+  InstructionMatcher &InsnMatcher = InsnMatcherOrError.get();
+
+  // The root of the match also has constraints on the register bank so that it
+  // matches the result instruction.
+  unsigned OpIdx = 0;
+  for (const EEVT::TypeSet &Ty : Src->getExtTypes()) {
+    (void)Ty;
+
+    const auto &DstIOperand = DstI.Operands[OpIdx];
+    Record *DstIOpRec = DstIOperand.Rec;
+    if (!DstIOpRec->isSubClassOf("RegisterClass"))
+      return failedImport("Dst MI def isn't a register class");
+
+    OperandMatcher &OM = InsnMatcher.getOperand(OpIdx);
+    OM.setSymbolicName(DstIOperand.Name);
+    OM.addPredicate<RegisterBankOperandMatcher>(
+        Target.getRegisterClass(DstIOpRec));
     ++OpIdx;
   }
 
-  // We're done with this pattern!  Emit the processed result.
-  M.emit(OS);
-  ++NumPatternEmitted;
-  return None;
+  auto DstMIBuilderOrError =
+      createAndImportInstructionRenderer(M, Dst, InsnMatcher);
+  if (auto Error = DstMIBuilderOrError.takeError())
+    return std::move(Error);
+  BuildMIAction &DstMIBuilder = DstMIBuilderOrError.get();
+
+  // Render the implicit defs.
+  // These are only added to the root of the result.
+  if (auto Error = importImplicitDefRenderers(DstMIBuilder, P.getDstRegs()))
+    return std::move(Error);
+
+  // We're done with this pattern!  It's eligible for GISel emission; return it.
+  ++NumPatternImported;
+  return std::move(M);
 }
 
 void GlobalISelEmitter::run(raw_ostream &OS) {
@@ -359,26 +1688,71 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
 
   emitSourceFileHeader(("Global Instruction Selector for the " +
                        Target.getName() + " target").str(), OS);
-  OS << "bool " << Target.getName()
-     << "InstructionSelector::selectImpl"
-        "(MachineInstr &I) const {\n  const MachineRegisterInfo &MRI = "
-        "I.getParent()->getParent()->getRegInfo();\n";
-
+  std::vector<RuleMatcher> Rules;
   // Look through the SelectionDAG patterns we found, possibly emitting some.
   for (const PatternToMatch &Pat : CGP.ptms()) {
     ++NumPatternTotal;
-    if (auto SkipReason = runOnPattern(Pat, OS)) {
+    auto MatcherOrErr = runOnPattern(Pat);
+
+    // The pattern analysis can fail, indicating an unsupported pattern.
+    // Report that if we've been asked to do so.
+    if (auto Err = MatcherOrErr.takeError()) {
       if (WarnOnSkippedPatterns) {
         PrintWarning(Pat.getSrcRecord()->getLoc(),
-                     "Skipped pattern: " + SkipReason->Reason);
+                     "Skipped pattern: " + toString(std::move(Err)));
+      } else {
+        consumeError(std::move(Err));
       }
-      ++NumPatternSkipped;
+      ++NumPatternImportsSkipped;
+      continue;
     }
+
+    Rules.push_back(std::move(MatcherOrErr.get()));
+  }
+
+  std::stable_sort(Rules.begin(), Rules.end(),
+            [&](const RuleMatcher &A, const RuleMatcher &B) {
+              if (A.isHigherPriorityThan(B)) {
+                assert(!B.isHigherPriorityThan(A) && "Cannot be more important "
+                                                     "and less important at "
+                                                     "the same time");
+                return true;
+              }
+              return false;
+            });
+
+  unsigned MaxTemporaries = 0;
+  for (const auto &Rule : Rules)
+    MaxTemporaries = std::max(MaxTemporaries, Rule.countTemporaryOperands());
+
+  OS << "#ifdef GET_GLOBALISEL_TEMPORARIES_DECL\n";
+  for (unsigned I = 0; I < MaxTemporaries; ++I)
+    OS << "  mutable MachineOperand TempOp" << I << ";\n";
+  OS << "#endif // ifdef GET_GLOBALISEL_TEMPORARIES_DECL\n\n";
+
+  OS << "#ifdef GET_GLOBALISEL_TEMPORARIES_INIT\n";
+  for (unsigned I = 0; I < MaxTemporaries; ++I)
+    OS << ", TempOp" << I << "(MachineOperand::CreatePlaceholder())\n";
+  OS << "#endif // ifdef GET_GLOBALISEL_TEMPORARIES_INIT\n\n";
+
+  OS << "#ifdef GET_GLOBALISEL_IMPL\n"
+     << "bool " << Target.getName()
+     << "InstructionSelector::selectImpl(MachineInstr &I) const {\n"
+     << "  MachineFunction &MF = *I.getParent()->getParent();\n"
+     << "  const MachineRegisterInfo &MRI = MF.getRegInfo();\n";
+
+  for (auto &Rule : Rules) {
+    Rule.emit(OS);
+    ++NumPatternEmitted;
   }
 
-  OS << "  return false;\n}\n";
+  OS << "  return false;\n"
+     << "}\n"
+     << "#endif // ifdef GET_GLOBALISEL_IMPL\n";
 }
 
+} // end anonymous namespace
+
 //===----------------------------------------------------------------------===//
 
 namespace llvm {
diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp
index 33256ccba46c2..e9dd2fa0aca00 100644
--- a/utils/TableGen/IntrinsicEmitter.cpp
+++ b/utils/TableGen/IntrinsicEmitter.cpp
@@ -133,14 +133,14 @@ void IntrinsicEmitter::EmitTargetInfo(const CodeGenIntrinsicTable &Ints,
   OS << "// Target mapping\n";
   OS << "#ifdef GET_INTRINSIC_TARGET_DATA\n";
   OS << "struct IntrinsicTargetInfo {\n"
-     << "  StringRef Name;\n"
+     << "  llvm::StringLiteral Name;\n"
      << "  size_t Offset;\n"
      << "  size_t Count;\n"
      << "};\n";
-  OS << "static const IntrinsicTargetInfo TargetInfos[] = {\n";
+  OS << "static constexpr IntrinsicTargetInfo TargetInfos[] = {\n";
   for (auto Target : Ints.Targets)
-    OS << "  {\"" << Target.Name << "\", " << Target.Offset << ", "
-       << Target.Count << "},\n";
+    OS << "  {llvm::StringLiteral(\"" << Target.Name << "\"), " << Target.Offset
+       << ", " << Target.Count << "},\n";
   OS << "};\n";
   OS << "#endif\n\n";
 }
@@ -497,10 +497,10 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
   OS << "// Add parameter attributes that are not common to all intrinsics.\n";
   OS << "#ifdef GET_INTRINSIC_ATTRIBUTES\n";
   if (TargetOnly)
-    OS << "static AttributeSet getAttributes(LLVMContext &C, " << TargetPrefix
+    OS << "static AttributeList getAttributes(LLVMContext &C, " << TargetPrefix
        << "Intrinsic::ID id) {\n";
   else
-    OS << "AttributeSet Intrinsic::getAttributes(LLVMContext &C, ID id) {\n";
+    OS << "AttributeList Intrinsic::getAttributes(LLVMContext &C, ID id) {\n";
 
   // Compute the maximum number of attribute arguments and the map
   typedef std::map<const CodeGenIntrinsic*, unsigned,
@@ -518,7 +518,7 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
     N = ++AttrNum;
   }
 
-  // Emit an array of AttributeSet.  Most intrinsics will have at least one
+  // Emit an array of AttributeList.  Most intrinsics will have at least one
   // entry, for the function itself (index ~1), which is usually nounwind.
   OS << "  static const uint8_t IntrinsicsToAttributesMap[] = {\n";
 
@@ -530,7 +530,7 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
   }
   OS << "  };\n\n";
 
-  OS << "  AttributeSet AS[" << maxArgAttrs+1 << "];\n";
+  OS << "  AttributeList AS[" << maxArgAttrs + 1 << "];\n";
   OS << "  unsigned NumAttrs = 0;\n";
   OS << "  if (id != 0) {\n";
   OS << "    switch(IntrinsicsToAttributesMap[id - ";
@@ -595,8 +595,8 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
           ++ai;
         } while (ai != ae && intrinsic.ArgumentAttributes[ai].first == argNo);
         OS << "};\n";
-        OS << "      AS[" << numAttrs++ << "] = AttributeSet::get(C, "
-           << argNo+1 << ", AttrParam" << argNo +1 << ");\n";
+        OS << "      AS[" << numAttrs++ << "] = AttributeList::get(C, "
+           << argNo + 1 << ", AttrParam" << argNo + 1 << ");\n";
       }
     }
 
@@ -699,8 +699,8 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
         break;
       }
       OS << "};\n";
-      OS << "      AS[" << numAttrs++ << "] = AttributeSet::get(C, "
-         << "AttributeSet::FunctionIndex, Atts);\n";
+      OS << "      AS[" << numAttrs++ << "] = AttributeList::get(C, "
+         << "AttributeList::FunctionIndex, Atts);\n";
     }
 
     if (numAttrs) {
@@ -708,14 +708,14 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
       OS << "      break;\n";
       OS << "      }\n";
     } else {
-      OS << "      return AttributeSet();\n";
+      OS << "      return AttributeList();\n";
       OS << "      }\n";
     }
   }
 
   OS << "    }\n";
   OS << "  }\n";
-  OS << "  return AttributeSet::get(C, makeArrayRef(AS, NumAttrs));\n";
+  OS << "  return AttributeList::get(C, makeArrayRef(AS, NumAttrs));\n";
   OS << "}\n";
   OS << "#endif // GET_INTRINSIC_ATTRIBUTES\n\n";
 }
diff --git a/utils/TableGen/RegisterBankEmitter.cpp b/utils/TableGen/RegisterBankEmitter.cpp
new file mode 100644
index 0000000000000..bf066412b2860
--- /dev/null
+++ b/utils/TableGen/RegisterBankEmitter.cpp
@@ -0,0 +1,320 @@
+//===- RegisterBankEmitter.cpp - Generate a Register Bank Desc. -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend is responsible for emitting a description of a target
+// register bank for a code generator.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
+
+#include "CodeGenRegisters.h"
+
+#define DEBUG_TYPE "register-bank-emitter"
+
+using namespace llvm;
+
+namespace {
+class RegisterBank {
+
+  /// A vector of register classes that are included in the register bank.
+  typedef std::vector<const CodeGenRegisterClass *> RegisterClassesTy;
+
+private:
+  const Record &TheDef;
+
+  /// The register classes that are covered by the register bank.
+  RegisterClassesTy RCs;
+
+  /// The register class with the largest register size.
+  const CodeGenRegisterClass *RCWithLargestRegsSize;
+
+public:
+  RegisterBank(const Record &TheDef)
+      : TheDef(TheDef), RCs(), RCWithLargestRegsSize(nullptr) {}
+
+  /// Get the human-readable name for the bank.
+  std::string getName() const { return TheDef.getValueAsString("Name"); }
+  /// Get the name of the enumerator in the ID enumeration.
+  std::string getEnumeratorName() const { return (TheDef.getName() + "ID").str(); }
+
+  /// Get the name of the array holding the register class coverage data;
+  std::string getCoverageArrayName() const {
+    return (TheDef.getName() + "CoverageData").str();
+  }
+
+  /// Get the name of the global instance variable.
+  StringRef getInstanceVarName() const { return TheDef.getName(); }
+
+  const Record &getDef() const { return TheDef; }
+
+  /// Get the register classes listed in the RegisterBank.RegisterClasses field.
+  std::vector<const CodeGenRegisterClass *>
+  getExplictlySpecifiedRegisterClasses(
+      CodeGenRegBank &RegisterClassHierarchy) const {
+    std::vector<const CodeGenRegisterClass *> RCs;
+    for (const auto &RCDef : getDef().getValueAsListOfDefs("RegisterClasses"))
+      RCs.push_back(RegisterClassHierarchy.getRegClass(RCDef));
+    return RCs;
+  }
+
+  /// Add a register class to the bank without duplicates.
+  void addRegisterClass(const CodeGenRegisterClass *RC) {
+    if (std::find_if(RCs.begin(), RCs.end(),
+                     [&RC](const CodeGenRegisterClass *X) {
+                       return X == RC;
+                     }) != RCs.end())
+      return;
+
+    // FIXME? We really want the register size rather than the spill size
+    //        since the spill size may be bigger on some targets with
+    //        limited load/store instructions. However, we don't store the
+    //        register size anywhere (we could sum the sizes of the subregisters
+    //        but there may be additional bits too) and we can't derive it from
+    //        the VT's reliably due to Untyped.
+    if (RCWithLargestRegsSize == nullptr)
+      RCWithLargestRegsSize = RC;
+    else if (RCWithLargestRegsSize->SpillSize < RC->SpillSize)
+      RCWithLargestRegsSize = RC;
+    assert(RCWithLargestRegsSize && "RC was nullptr?");
+
+    RCs.emplace_back(RC);
+  }
+
+  const CodeGenRegisterClass *getRCWithLargestRegsSize() const {
+    return RCWithLargestRegsSize;
+  }
+
+  iterator_range<typename RegisterClassesTy::const_iterator>
+  register_classes() const {
+    return llvm::make_range(RCs.begin(), RCs.end());
+  }
+};
+
+class RegisterBankEmitter {
+private:
+  RecordKeeper &Records;
+  CodeGenRegBank RegisterClassHierarchy;
+
+  void emitHeader(raw_ostream &OS, const StringRef TargetName,
+                  const std::vector<RegisterBank> &Banks);
+  void emitBaseClassDefinition(raw_ostream &OS, const StringRef TargetName,
+                               const std::vector<RegisterBank> &Banks);
+  void emitBaseClassImplementation(raw_ostream &OS, const StringRef TargetName,
+                                   std::vector<RegisterBank> &Banks);
+
+public:
+  RegisterBankEmitter(RecordKeeper &R)
+      : Records(R), RegisterClassHierarchy(Records) {}
+
+  void run(raw_ostream &OS);
+};
+
+} // end anonymous namespace
+
+/// Emit code to declare the ID enumeration and external global instance
+/// variables.
+void RegisterBankEmitter::emitHeader(raw_ostream &OS,
+                                     const StringRef TargetName,
+                                     const std::vector<RegisterBank> &Banks) {
+  // <Target>RegisterBankInfo.h
+  OS << "namespace llvm {\n"
+     << "namespace " << TargetName << " {\n"
+     << "enum {\n";
+  for (const auto &Bank : Banks)
+    OS << "  " << Bank.getEnumeratorName() << ",\n";
+  OS << "  NumRegisterBanks,\n"
+     << "};\n"
+     << "} // end namespace " << TargetName << "\n"
+     << "} // end namespace llvm\n";
+}
+
+/// Emit declarations of the <Target>GenRegisterBankInfo class.
+void RegisterBankEmitter::emitBaseClassDefinition(
+    raw_ostream &OS, const StringRef TargetName,
+    const std::vector<RegisterBank> &Banks) {
+  OS << "private:\n"
+     << "  static RegisterBank *RegBanks[];\n\n"
+     << "protected:\n"
+     << "  " << TargetName << "GenRegisterBankInfo();\n"
+     << "\n";
+}
+
+/// Visit each register class belonging to the given register bank.
+///
+/// A class belongs to the bank iff any of these apply:
+/// * It is explicitly specified
+/// * It is a subclass of a class that is a member.
+/// * It is a class containing subregisters of the registers of a class that
+///   is a member. This is known as a subreg-class.
+///
+/// This function must be called for each explicitly specified register class.
+///
+/// \param RC The register class to search.
+/// \param Kind A debug string containing the path the visitor took to reach RC.
+/// \param VisitFn The action to take for each class visited. It may be called
+///                multiple times for a given class if there are multiple paths
+///                to the class.
+static void visitRegisterBankClasses(
+    CodeGenRegBank &RegisterClassHierarchy, const CodeGenRegisterClass *RC,
+    const Twine Kind,
+    std::function<void(const CodeGenRegisterClass *, StringRef)> VisitFn,
+    SmallPtrSetImpl<const CodeGenRegisterClass *> &VisitedRCs) {
+
+  // Make sure we only visit each class once to avoid infinite loops.
+  if (VisitedRCs.count(RC))
+    return;
+  VisitedRCs.insert(RC);
+
+  // Visit each explicitly named class.
+  VisitFn(RC, Kind.str());
+
+  for (const auto &PossibleSubclass : RegisterClassHierarchy.getRegClasses()) {
+    std::string TmpKind =
+        (Twine(Kind) + " (" + PossibleSubclass.getName() + ")").str();
+
+    // Visit each subclass of an explicitly named class.
+    if (RC != &PossibleSubclass && RC->hasSubClass(&PossibleSubclass))
+      visitRegisterBankClasses(RegisterClassHierarchy, &PossibleSubclass,
+                               TmpKind + " " + RC->getName() + " subclass",
+                               VisitFn, VisitedRCs);
+
+    // Visit each class that contains only subregisters of RC with a common
+    // subregister-index.
+    //
+    // More precisely, PossibleSubclass is a subreg-class iff Reg:SubIdx is in
+    // PossibleSubclass for all registers Reg from RC using any
+    // subregister-index SubReg
+    for (const auto &SubIdx : RegisterClassHierarchy.getSubRegIndices()) {
+      BitVector BV(RegisterClassHierarchy.getRegClasses().size());
+      PossibleSubclass.getSuperRegClasses(&SubIdx, BV);
+      if (BV.test(RC->EnumValue)) {
+        std::string TmpKind2 = (Twine(TmpKind) + " " + RC->getName() +
+                                " class-with-subregs: " + RC->getName())
+                                   .str();
+        VisitFn(&PossibleSubclass, TmpKind2);
+      }
+    }
+  }
+}
+
+void RegisterBankEmitter::emitBaseClassImplementation(
+    raw_ostream &OS, StringRef TargetName,
+    std::vector<RegisterBank> &Banks) {
+
+  OS << "namespace llvm {\n"
+     << "namespace " << TargetName << " {\n";
+  for (const auto &Bank : Banks) {
+    std::vector<std::vector<const CodeGenRegisterClass *>> RCsGroupedByWord(
+        (RegisterClassHierarchy.getRegClasses().size() + 31) / 32);
+
+    for (const auto &RC : Bank.register_classes())
+      RCsGroupedByWord[RC->EnumValue / 32].push_back(RC);
+
+    OS << "const uint32_t " << Bank.getCoverageArrayName() << "[] = {\n";
+    unsigned LowestIdxInWord = 0;
+    for (const auto &RCs : RCsGroupedByWord) {
+      OS << "    // " << LowestIdxInWord << "-" << (LowestIdxInWord + 31) << "\n";
+      for (const auto &RC : RCs) {
+        std::string QualifiedRegClassID =
+            (Twine(TargetName) + "::" + RC->getName() + "RegClassID").str();
+        OS << "    (1u << (" << QualifiedRegClassID << " - "
+           << LowestIdxInWord << ")) |\n";
+      }
+      OS << "    0,\n";
+      LowestIdxInWord += 32;
+    }
+    OS << "};\n";
+  }
+  OS << "\n";
+
+  for (const auto &Bank : Banks) {
+    std::string QualifiedBankID =
+        (TargetName + "::" + Bank.getEnumeratorName()).str();
+    unsigned Size = Bank.getRCWithLargestRegsSize()->SpillSize;
+    OS << "RegisterBank " << Bank.getInstanceVarName() << "(/* ID */ "
+       << QualifiedBankID << ", /* Name */ \"" << Bank.getName()
+       << "\", /* Size */ " << Size << ", "
+       << "/* CoveredRegClasses */ " << Bank.getCoverageArrayName()
+       << ", /* NumRegClasses */ "
+       << RegisterClassHierarchy.getRegClasses().size() << ");\n";
+  }
+  OS << "} // end namespace " << TargetName << "\n"
+     << "\n";
+
+  OS << "RegisterBank *" << TargetName
+     << "GenRegisterBankInfo::RegBanks[] = {\n";
+  for (const auto &Bank : Banks)
+    OS << "    &" << TargetName << "::" << Bank.getInstanceVarName() << ",\n";
+  OS << "};\n\n";
+
+  OS << TargetName << "GenRegisterBankInfo::" << TargetName
+     << "GenRegisterBankInfo()\n"
+     << "    : RegisterBankInfo(RegBanks, " << TargetName
+     << "::NumRegisterBanks) {\n"
+     << "  // Assert that RegBank indices match their ID's\n"
+     << "#ifndef NDEBUG\n"
+     << "  unsigned Index = 0;\n"
+     << "  for (const auto &RB : RegBanks)\n"
+     << "    assert(Index++ == RB->getID() && \"Index != ID\");\n"
+     << "#endif // NDEBUG\n"
+     << "}\n"
+     << "} // end namespace llvm\n";
+}
+
+void RegisterBankEmitter::run(raw_ostream &OS) {
+  std::vector<Record*> Targets = Records.getAllDerivedDefinitions("Target");
+  if (Targets.size() != 1)
+    PrintFatalError("ERROR: Too many or too few subclasses of Target defined!");
+  StringRef TargetName = Targets[0]->getName();
+
+  std::vector<RegisterBank> Banks;
+  for (const auto &V : Records.getAllDerivedDefinitions("RegisterBank")) {
+    SmallPtrSet<const CodeGenRegisterClass *, 8> VisitedRCs;
+    RegisterBank Bank(*V);
+
+    for (const CodeGenRegisterClass *RC :
+         Bank.getExplictlySpecifiedRegisterClasses(RegisterClassHierarchy)) {
+      visitRegisterBankClasses(
+          RegisterClassHierarchy, RC, "explicit",
+          [&Bank](const CodeGenRegisterClass *RC, StringRef Kind) {
+            DEBUG(dbgs() << "Added " << RC->getName() << "(" << Kind << ")\n");
+            Bank.addRegisterClass(RC);
+          }, VisitedRCs);
+    }
+
+    Banks.push_back(Bank);
+  }
+
+  emitSourceFileHeader("Register Bank Source Fragments", OS);
+  OS << "#ifdef GET_REGBANK_DECLARATIONS\n"
+     << "#undef GET_REGBANK_DECLARATIONS\n";
+  emitHeader(OS, TargetName, Banks);
+  OS << "#endif // GET_REGBANK_DECLARATIONS\n\n"
+     << "#ifdef GET_TARGET_REGBANK_CLASS\n"
+     << "#undef GET_TARGET_REGBANK_CLASS\n";
+  emitBaseClassDefinition(OS, TargetName, Banks);
+  OS << "#endif // GET_TARGET_REGBANK_CLASS\n\n"
+     << "#ifdef GET_TARGET_REGBANK_IMPL\n"
+     << "#undef GET_TARGET_REGBANK_IMPL\n";
+  emitBaseClassImplementation(OS, TargetName, Banks);
+  OS << "#endif // GET_TARGET_REGBANK_IMPL\n";
+}
+
+namespace llvm {
+
+void EmitRegisterBank(RecordKeeper &RK, raw_ostream &OS) {
+  RegisterBankEmitter(RK).run(OS);
+}
+
+} // end namespace llvm
diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp
index b75be13c0480e..5b56578a64b3b 100644
--- a/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/utils/TableGen/RegisterInfoEmitter.cpp
@@ -1023,18 +1023,14 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
      << "MCRegisterClasses[] = {\n";
 
   for (const auto &RC : RegisterClasses) {
-    // Asserts to make sure values will fit in table assuming types from
-    // MCRegisterInfo.h
-    assert((RC.SpillSize/8) <= 0xffff && "SpillSize too large.");
-    assert((RC.SpillAlignment/8) <= 0xffff && "SpillAlignment too large.");
-    assert(RC.CopyCost >= -128 && RC.CopyCost <= 127 && "Copy cost too large.");
-
+    assert(isInt<8>(RC.CopyCost) && "Copy cost too large.");
+    // Register size and spill size will become independent, but are not at
+    // the moment. For now use SpillSize as the register size.
     OS << "  { " << RC.getName() << ", " << RC.getName() << "Bits, "
        << RegClassStrings.get(RC.getName()) << ", "
        << RC.getOrder().size() << ", sizeof(" << RC.getName() << "Bits), "
        << RC.getQualifiedName() + "RegClassID" << ", "
        << RC.SpillSize/8 << ", "
-       << RC.SpillAlignment/8 << ", "
        << RC.CopyCost << ", "
        << ( RC.Allocatable ? "true" : "false" ) << " },\n";
   }
@@ -1316,9 +1312,13 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
        << " {   // Register class instances\n";
 
     for (const auto &RC : RegisterClasses) {
+      assert(isUInt<16>(RC.SpillSize/8) && "SpillSize too large.");
+      assert(isUInt<16>(RC.SpillAlignment/8) && "SpillAlignment too large.");
       OS << "  extern const TargetRegisterClass " << RC.getName()
          << "RegClass = {\n    " << '&' << Target.getName()
          << "MCRegisterClasses[" << RC.getName() << "RegClassID],\n    "
+         << RC.SpillSize/8 << ", /* SpillSize */\n    "
+         << RC.SpillAlignment/8 << ", /* SpillAlignment */\n    "
          << "VTLists + " << VTSeqs.get(RC.VTs) << ",\n    " << RC.getName()
          << "SubClassMask,\n    SuperRegIdxSeqs + "
          << SuperRegIdxSeqs.get(SuperRegIdxLists[RC.EnumValue]) << ",\n    ";
diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp
index bf7b392b15e58..30516ef5d10de 100644
--- a/utils/TableGen/SubtargetEmitter.cpp
+++ b/utils/TableGen/SubtargetEmitter.cpp
@@ -917,6 +917,8 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
         SCDesc.NumMicroOps += WriteRes->getValueAsInt("NumMicroOps");
         SCDesc.BeginGroup |= WriteRes->getValueAsBit("BeginGroup");
         SCDesc.EndGroup |= WriteRes->getValueAsBit("EndGroup");
+        SCDesc.BeginGroup |= WriteRes->getValueAsBit("SingleIssue");
+        SCDesc.EndGroup |= WriteRes->getValueAsBit("SingleIssue");
 
         // Create an entry for each ProcResource listed in WriteRes.
         RecVec PRVec = WriteRes->getValueAsListOfDefs("ProcResources");
diff --git a/utils/TableGen/SubtargetFeatureInfo.cpp b/utils/TableGen/SubtargetFeatureInfo.cpp
index 7db8813050fe6..72a556182b1dc 100644
--- a/utils/TableGen/SubtargetFeatureInfo.cpp
+++ b/utils/TableGen/SubtargetFeatureInfo.cpp
@@ -16,10 +16,11 @@
 
 using namespace llvm;
 
-void SubtargetFeatureInfo::dump() const {
-  errs() << getEnumName() << " " << Index << "\n";
-  TheDef->dump();
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void SubtargetFeatureInfo::dump() const {
+  errs() << getEnumName() << " " << Index << "\n" << *TheDef;
 }
+#endif
 
 std::vector<std::pair<Record *, SubtargetFeatureInfo>>
 SubtargetFeatureInfo::getAll(const RecordKeeper &Records) {
@@ -61,11 +62,24 @@ void SubtargetFeatureInfo::emitSubtargetFeatureFlagEnumeration(
 void SubtargetFeatureInfo::emitNameTable(
     std::map<Record *, SubtargetFeatureInfo, LessRecordByID> &SubtargetFeatures,
     raw_ostream &OS) {
+  // Need to sort the name table so that lookup by the log of the enum value
+  // gives the proper name. More specifically, for a feature of value 1<<n,
+  // SubtargetFeatureNames[n] should be the name of the feature.
+  uint64_t IndexUB = 0;
+  for (const auto &SF : SubtargetFeatures)
+    if (IndexUB <= SF.second.Index)
+      IndexUB = SF.second.Index+1;
+
+  std::vector<std::string> Names;
+  if (IndexUB > 0)
+    Names.resize(IndexUB);
+  for (const auto &SF : SubtargetFeatures)
+    Names[SF.second.Index] = SF.second.getEnumName();
+
   OS << "static const char *SubtargetFeatureNames[] = {\n";
-  for (const auto &SF : SubtargetFeatures) {
-    const SubtargetFeatureInfo &SFI = SF.second;
-    OS << "  \"" << SFI.getEnumName() << "\",\n";
-  }
+  for (uint64_t I = 0; I < IndexUB; ++I)
+    OS << "  \"" << Names[I] << "\",\n";
+
   // A small number of targets have no predicates. Null terminate the array to
   // avoid a zero-length array.
   OS << "  nullptr\n"
diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp
index 79a773161e4b4..00d20f1df6c27 100644
--- a/utils/TableGen/TableGen.cpp
+++ b/utils/TableGen/TableGen.cpp
@@ -46,6 +46,8 @@ enum ActionType {
   GenAttributes,
   GenSearchableTables,
   GenGlobalISel,
+  GenX86EVEX2VEXTables,
+  GenRegisterBank,
 };
 
 namespace {
@@ -94,11 +96,16 @@ namespace {
                     clEnumValN(GenSearchableTables, "gen-searchable-tables",
                                "Generate generic binary-searchable table"),
                     clEnumValN(GenGlobalISel, "gen-global-isel",
-                               "Generate GlobalISel selector")));
+                               "Generate GlobalISel selector"),
+                    clEnumValN(GenX86EVEX2VEXTables, "gen-x86-EVEX2VEX-tables",
+                               "Generate X86 EVEX to VEX compress tables"),
+                    clEnumValN(GenRegisterBank, "gen-register-bank",
+                               "Generate registers bank descriptions")));
 
+  cl::OptionCategory PrintEnumsCat("Options for -print-enums");
   cl::opt<std::string>
   Class("class", cl::desc("Print Enum list for this class"),
-          cl::value_desc("class name"));
+        cl::value_desc("class name"), cl::cat(PrintEnumsCat));
 
 bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
   switch (Action) {
@@ -183,6 +190,12 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
   case GenGlobalISel:
     EmitGlobalISel(Records, OS);
     break;
+  case GenRegisterBank:
+    EmitRegisterBank(Records, OS);
+    break;
+  case GenX86EVEX2VEXTables:
+    EmitX86EVEX2VEXTables(Records, OS);
+    break;
   }
 
   return false;
diff --git a/utils/TableGen/TableGenBackends.h b/utils/TableGen/TableGenBackends.h
index eb306d28180c5..2512997e27f93 100644
--- a/utils/TableGen/TableGenBackends.h
+++ b/utils/TableGen/TableGenBackends.h
@@ -81,6 +81,8 @@ void EmitCTags(RecordKeeper &RK, raw_ostream &OS);
 void EmitAttributes(RecordKeeper &RK, raw_ostream &OS);
 void EmitSearchableTables(RecordKeeper &RK, raw_ostream &OS);
 void EmitGlobalISel(RecordKeeper &RK, raw_ostream &OS);
+void EmitX86EVEX2VEXTables(RecordKeeper &RK, raw_ostream &OS);
+void EmitRegisterBank(RecordKeeper &RK, raw_ostream &OS);
 
 } // End llvm namespace
 
diff --git a/utils/TableGen/X86DisassemblerTables.cpp b/utils/TableGen/X86DisassemblerTables.cpp
index 5b710e4461507..c9e36f96736ad 100644
--- a/utils/TableGen/X86DisassemblerTables.cpp
+++ b/utils/TableGen/X86DisassemblerTables.cpp
@@ -879,6 +879,10 @@ void DisassemblerTables::setTableFields(ModRMDecision     &decision,
                                            newInfo.name == "XCHG64ar"))
           continue; // special case for XCHG*ar and NOOP
 
+        if (previousInfo.name == "DATA16_PREFIX" &&
+            newInfo.name == "DATA32_PREFIX")
+          continue; // special case for data16 and data32
+
         if (outranks(previousInfo.insnContext, newInfo.insnContext))
           continue;
 
diff --git a/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp b/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
new file mode 100644
index 0000000000000..07b96b03b01cc
--- /dev/null
+++ b/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
@@ -0,0 +1,339 @@
+//===- utils/TableGen/X86EVEX2VEXTablesEmitter.cpp - X86 backend-*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// This tablegen backend is responsible for emitting the X86 backend EVEX2VEX
+/// compression tables.
+///
+//===----------------------------------------------------------------------===//
+
+#include "CodeGenDAGPatterns.h"
+#include "CodeGenTarget.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/TableGenBackend.h"
+
+using namespace llvm;
+
+namespace {
+
+class X86EVEX2VEXTablesEmitter {
+  CodeGenTarget Target;
+
+  // Hold all non-masked & non-broadcasted EVEX encoded instructions
+  std::vector<const CodeGenInstruction *> EVEXInsts;
+  // Hold all VEX encoded instructions. Divided into groups with same opcodes
+  // to make the search more efficient
+  std::map<uint64_t, std::vector<const CodeGenInstruction *>> VEXInsts;
+
+  typedef std::pair<const CodeGenInstruction *, const CodeGenInstruction *> Entry;
+
+  // Represent both compress tables
+  std::vector<Entry> EVEX2VEX128;
+  std::vector<Entry> EVEX2VEX256;
+
+  // Represents a manually added entry to the tables
+  struct ManualEntry {
+    const char *EVEXInstStr;
+    const char *VEXInstStr;
+    bool Is128Bit;
+  };
+
+public:
+  X86EVEX2VEXTablesEmitter(RecordKeeper &R) : Target(R) {}
+
+  // run - Output X86 EVEX2VEX tables.
+  void run(raw_ostream &OS);
+
+private:
+  // Prints the given table as a C++ array of type
+  // X86EvexToVexCompressTableEntry
+  void printTable(const std::vector<Entry> &Table, raw_ostream &OS);
+
+  bool inExceptionList(const CodeGenInstruction *Inst) {
+    // List of EVEX instructions that match VEX instructions by the encoding
+    // but do not perform the same operation.
+    static constexpr const char *ExceptionList[] = {
+        "VCVTQQ2PD",
+        "VCVTQQ2PS",
+        "VPMAXSQ",
+        "VPMAXUQ",
+        "VPMINSQ",
+        "VPMINUQ",
+        "VPMULLQ",
+        "VPSRAQ",
+        "VDBPSADBW",
+        "VRNDSCALE",
+        "VSCALEFPS"
+    };
+    // Instruction's name starts with one of the entries in the exception list
+    for (StringRef InstStr : ExceptionList) {
+      if (Inst->TheDef->getName().startswith(InstStr))
+        return true;
+    }
+    return false;
+  }
+
+};
+
+void X86EVEX2VEXTablesEmitter::printTable(const std::vector<Entry> &Table,
+                                          raw_ostream &OS) {
+  std::string Size = (Table == EVEX2VEX128) ? "128" : "256";
+
+  OS << "// X86 EVEX encoded instructions that have a VEX " << Size
+     << " encoding\n"
+     << "// (table format: <EVEX opcode, VEX-" << Size << " opcode>).\n"
+     << "static const X86EvexToVexCompressTableEntry X86EvexToVex" << Size
+     << "CompressTable[] = {\n"
+     << "  // EVEX scalar with corresponding VEX.\n";
+
+  // Print all entries added to the table
+  for (auto Pair : Table) {
+    OS << "  { X86::" << Pair.first->TheDef->getName()
+       << ", X86::" << Pair.second->TheDef->getName() << " },\n";
+  }
+
+  // Some VEX instructions were duplicated to multiple EVEX versions due the
+  // introduction of mask variants, and thus some of the EVEX versions have
+  // different encoding than the VEX instruction. In order to maximize the
+  // compression we add these entries manually.
+  static constexpr ManualEntry ManuallyAddedEntries[] = {
+      // EVEX-Inst            VEX-Inst           Is128-bit
+      {"VMOVDQU8Z128mr",      "VMOVDQUmr",       true},
+      {"VMOVDQU8Z128rm",      "VMOVDQUrm",       true},
+      {"VMOVDQU8Z128rr",      "VMOVDQUrr",       true},
+      {"VMOVDQU8Z128rr_REV",  "VMOVDQUrr_REV",   true},
+      {"VMOVDQU16Z128mr",     "VMOVDQUmr",       true},
+      {"VMOVDQU16Z128rm",     "VMOVDQUrm",       true},
+      {"VMOVDQU16Z128rr",     "VMOVDQUrr",       true},
+      {"VMOVDQU16Z128rr_REV", "VMOVDQUrr_REV",   true},
+      {"VMOVDQU8Z256mr",      "VMOVDQUYmr",      false},
+      {"VMOVDQU8Z256rm",      "VMOVDQUYrm",      false},
+      {"VMOVDQU8Z256rr",      "VMOVDQUYrr",      false},
+      {"VMOVDQU8Z256rr_REV",  "VMOVDQUYrr_REV",  false},
+      {"VMOVDQU16Z256mr",     "VMOVDQUYmr",      false},
+      {"VMOVDQU16Z256rm",     "VMOVDQUYrm",      false},
+      {"VMOVDQU16Z256rr",     "VMOVDQUYrr",      false},
+      {"VMOVDQU16Z256rr_REV", "VMOVDQUYrr_REV",  false},
+
+      {"VPERMILPDZ128mi",     "VPERMILPDmi",     true},
+      {"VPERMILPDZ128ri",     "VPERMILPDri",     true},
+      {"VPERMILPDZ128rm",     "VPERMILPDrm",     true},
+      {"VPERMILPDZ128rr",     "VPERMILPDrr",     true},
+      {"VPERMILPDZ256mi",     "VPERMILPDYmi",    false},
+      {"VPERMILPDZ256ri",     "VPERMILPDYri",    false},
+      {"VPERMILPDZ256rm",     "VPERMILPDYrm",    false},
+      {"VPERMILPDZ256rr",     "VPERMILPDYrr",    false},
+
+      {"VPBROADCASTQZ128m",   "VPBROADCASTQrm",  true},
+      {"VPBROADCASTQZ128r",   "VPBROADCASTQrr",  true},
+      {"VPBROADCASTQZ256m",   "VPBROADCASTQYrm", false},
+      {"VPBROADCASTQZ256r",   "VPBROADCASTQYrr", false},
+
+      {"VBROADCASTSDZ256m",   "VBROADCASTSDYrm", false},
+      {"VBROADCASTSDZ256r",   "VBROADCASTSDYrr", false},
+
+      {"VEXTRACTF64x2Z256mr", "VEXTRACTF128mr",  false},
+      {"VEXTRACTF64x2Z256rr", "VEXTRACTF128rr",  false},
+      {"VEXTRACTI64x2Z256mr", "VEXTRACTI128mr",  false},
+      {"VEXTRACTI64x2Z256rr", "VEXTRACTI128rr",  false},
+
+      {"VINSERTF64x2Z256rm",  "VINSERTF128rm",   false},
+      {"VINSERTF64x2Z256rr",  "VINSERTF128rr",   false},
+      {"VINSERTI64x2Z256rm",  "VINSERTI128rm",   false},
+      {"VINSERTI64x2Z256rr",  "VINSERTI128rr",   false}
+  };
+
+  // Print the manually added entries
+  for (const ManualEntry &Entry : ManuallyAddedEntries) {
+    if ((Table == EVEX2VEX128 && Entry.Is128Bit) ||
+        (Table == EVEX2VEX256 && !Entry.Is128Bit)) {
+      OS << "  { X86::" << Entry.EVEXInstStr << ", X86::" << Entry.VEXInstStr
+         << " },\n";
+    }
+  }
+
+  OS << "};\n\n";
+}
+
+// Return true if the 2 BitsInits are equal
+static inline bool equalBitsInits(const BitsInit *B1, const BitsInit *B2) {
+  if (B1->getNumBits() != B2->getNumBits())
+    PrintFatalError("Comparing two BitsInits with different sizes!");
+
+  for (unsigned i = 0, e = B1->getNumBits(); i != e; ++i) {
+    if (BitInit *Bit1 = dyn_cast<BitInit>(B1->getBit(i))) {
+      if (BitInit *Bit2 = dyn_cast<BitInit>(B2->getBit(i))) {
+        if (Bit1->getValue() != Bit2->getValue())
+          return false;
+      } else
+        PrintFatalError("Invalid BitsInit bit");
+    } else
+      PrintFatalError("Invalid BitsInit bit");
+  }
+  return true;
+}
+
+// Calculates the integer value residing BitsInit object
+static inline uint64_t getValueFromBitsInit(const BitsInit *B) {
+  uint64_t Value = 0;
+  for (unsigned i = 0, e = B->getNumBits(); i != e; ++i) {
+    if (BitInit *Bit = dyn_cast<BitInit>(B->getBit(i)))
+      Value |= uint64_t(Bit->getValue()) << i;
+    else
+      PrintFatalError("Invalid VectSize bit");
+  }
+  return Value;
+}
+
+// Function object - Operator() returns true if the given VEX instruction
+// matches the EVEX instruction of this object.
+class IsMatch {
+  const CodeGenInstruction *Inst;
+
+public:
+  IsMatch(const CodeGenInstruction *Inst) : Inst(Inst) {}
+
+  bool operator()(const CodeGenInstruction *Inst2) {
+    Record *Rec1 = Inst->TheDef;
+    Record *Rec2 = Inst2->TheDef;
+    uint64_t Rec1WVEX =
+        getValueFromBitsInit(Rec1->getValueAsBitsInit("VEX_WPrefix"));
+    uint64_t Rec2WVEX =
+        getValueFromBitsInit(Rec2->getValueAsBitsInit("VEX_WPrefix"));
+
+    if (Rec2->getValueAsDef("OpEnc")->getName().str() != "EncVEX" ||
+        // VEX/EVEX fields
+        Rec2->getValueAsDef("OpPrefix") != Rec1->getValueAsDef("OpPrefix") ||
+        Rec2->getValueAsDef("OpMap") != Rec1->getValueAsDef("OpMap") ||
+        Rec2->getValueAsBit("hasVEX_4V") != Rec1->getValueAsBit("hasVEX_4V") ||
+        !equalBitsInits(Rec2->getValueAsBitsInit("EVEX_LL"),
+                        Rec1->getValueAsBitsInit("EVEX_LL")) ||
+        (Rec1WVEX != 2 && Rec2WVEX != 2 && Rec1WVEX != Rec2WVEX) ||
+        // Instruction's format
+        Rec2->getValueAsDef("Form") != Rec1->getValueAsDef("Form") ||
+        Rec2->getValueAsBit("isAsmParserOnly") !=
+            Rec1->getValueAsBit("isAsmParserOnly"))
+      return false;
+
+    // This is needed for instructions with intrinsic version (_Int).
+    // Where the only difference is the size of the operands.
+    // For example: VUCOMISDZrm and Int_VUCOMISDrm
+    // Also for instructions that their EVEX version was upgraded to work with
+    // k-registers. For example VPCMPEQBrm (xmm output register) and
+    // VPCMPEQBZ128rm (k register output register).
+    for (unsigned i = 0; i < Inst->Operands.size(); i++) {
+      Record *OpRec1 = Inst->Operands[i].Rec;
+      Record *OpRec2 = Inst2->Operands[i].Rec;
+
+      if (OpRec1 == OpRec2)
+        continue;
+
+      if (isRegisterOperand(OpRec1) && isRegisterOperand(OpRec2)) {
+        if (getRegOperandSize(OpRec1) != getRegOperandSize(OpRec2))
+          return false;
+      } else if (isMemoryOperand(OpRec1) && isMemoryOperand(OpRec2)) {
+        return false;
+      } else if (isImmediateOperand(OpRec1) && isImmediateOperand(OpRec2)) {
+        if (OpRec1->getValueAsDef("Type") != OpRec2->getValueAsDef("Type"))
+          return false;
+      } else
+        return false;
+    }
+
+    return true;
+  }
+
+private:
+  static inline bool isRegisterOperand(const Record *Rec) {
+    return Rec->isSubClassOf("RegisterClass") ||
+           Rec->isSubClassOf("RegisterOperand");
+  }
+
+  static inline bool isMemoryOperand(const Record *Rec) {
+    return Rec->isSubClassOf("Operand") &&
+           Rec->getValueAsString("OperandType") == "OPERAND_MEMORY";
+  }
+
+  static inline bool isImmediateOperand(const Record *Rec) {
+    return Rec->isSubClassOf("Operand") &&
+           Rec->getValueAsString("OperandType") == "OPERAND_IMMEDIATE";
+  }
+
+  static inline unsigned int getRegOperandSize(const Record *RegRec) {
+    if (RegRec->isSubClassOf("RegisterClass"))
+      return RegRec->getValueAsInt("Alignment");
+    if (RegRec->isSubClassOf("RegisterOperand"))
+      return RegRec->getValueAsDef("RegClass")->getValueAsInt("Alignment");
+
+    llvm_unreachable("Register operand's size not known!");
+  }
+};
+
+void X86EVEX2VEXTablesEmitter::run(raw_ostream &OS) {
+  emitSourceFileHeader("X86 EVEX2VEX tables", OS);
+
+  ArrayRef<const CodeGenInstruction *> NumberedInstructions =
+      Target.getInstructionsByEnumValue();
+
+  for (const CodeGenInstruction *Inst : NumberedInstructions) {
+    // Filter non-X86 instructions.
+    if (!Inst->TheDef->isSubClassOf("X86Inst"))
+      continue;
+
+    // Add VEX encoded instructions to one of VEXInsts vectors according to
+    // it's opcode.
+    if (Inst->TheDef->getValueAsDef("OpEnc")->getName() == "EncVEX") {
+      uint64_t Opcode = getValueFromBitsInit(Inst->TheDef->
+                                             getValueAsBitsInit("Opcode"));
+      VEXInsts[Opcode].push_back(Inst);
+    }
+    // Add relevant EVEX encoded instructions to EVEXInsts
+    else if (Inst->TheDef->getValueAsDef("OpEnc")->getName() == "EncEVEX" &&
+             !Inst->TheDef->getValueAsBit("hasEVEX_K") &&
+             !Inst->TheDef->getValueAsBit("hasEVEX_B") &&
+             getValueFromBitsInit(Inst->TheDef->
+                                        getValueAsBitsInit("EVEX_LL")) != 2 &&
+             !inExceptionList(Inst))
+      EVEXInsts.push_back(Inst);
+  }
+
+  for (const CodeGenInstruction *EVEXInst : EVEXInsts) {
+    uint64_t Opcode = getValueFromBitsInit(EVEXInst->TheDef->
+                                           getValueAsBitsInit("Opcode"));
+    // For each EVEX instruction look for a VEX match in the appropriate vector
+    // (instructions with the same opcode) using function object IsMatch.
+    auto Match = llvm::find_if(VEXInsts[Opcode], IsMatch(EVEXInst));
+    if (Match != VEXInsts[Opcode].end()) {
+      const CodeGenInstruction *VEXInst = *Match;
+
+      // In case a match is found add new entry to the appropriate table
+      switch (getValueFromBitsInit(
+          EVEXInst->TheDef->getValueAsBitsInit("EVEX_LL"))) {
+      case 0:
+        EVEX2VEX128.push_back(std::make_pair(EVEXInst, VEXInst)); // {0,0}
+        break;
+      case 1:
+        EVEX2VEX256.push_back(std::make_pair(EVEXInst, VEXInst)); // {0,1}
+        break;
+      default:
+        llvm_unreachable("Instruction's size not fit for the mapping!");
+      }
+    }
+  }
+
+  // Print both tables
+  printTable(EVEX2VEX128, OS);
+  printTable(EVEX2VEX256, OS);
+}
+}
+
+namespace llvm {
+void EmitX86EVEX2VEXTables(RecordKeeper &RK, raw_ostream &OS) {
+  X86EVEX2VEXTablesEmitter(RK).run(OS);
+}
+}
diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp
index 4736c4e510d12..e703bbfc4496f 100644
--- a/utils/TableGen/X86RecognizableInstr.cpp
+++ b/utils/TableGen/X86RecognizableInstr.cpp
@@ -138,6 +138,10 @@ namespace X86Local {
   enum {
     AdSize16 = 1, AdSize32 = 2, AdSize64 = 3
   };
+
+  enum {
+    VEX_W0 = 0, VEX_W1 = 1, VEX_WIG = 2
+  };
 }
 
 using namespace X86Disassembler;
@@ -203,7 +207,7 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
   AdSize           = byteFromRec(Rec, "AdSizeBits");
   HasREX_WPrefix   = Rec->getValueAsBit("hasREX_WPrefix");
   HasVEX_4V        = Rec->getValueAsBit("hasVEX_4V");
-  HasVEX_WPrefix   = Rec->getValueAsBit("hasVEX_WPrefix");
+  VEX_WPrefix      = byteFromRec(Rec,"VEX_WPrefix");
   IgnoresVEX_L     = Rec->getValueAsBit("ignoresVEX_L");
   HasEVEX_L2Prefix = Rec->getValueAsBit("hasEVEX_L2");
   HasEVEX_K        = Rec->getValueAsBit("hasEVEX_K");
@@ -280,7 +284,7 @@ InstructionContext RecognizableInstr::insnContext() const {
       llvm_unreachable("Don't support VEX.L if EVEX_L2 is enabled");
     }
     // VEX_L & VEX_W
-    if (HasVEX_LPrefix && HasVEX_WPrefix) {
+    if (HasVEX_LPrefix && VEX_WPrefix == X86Local::VEX_W1) {
       if (OpPrefix == X86Local::PD)
         insnContext = EVEX_KB(IC_EVEX_L_W_OPSIZE);
       else if (OpPrefix == X86Local::XS)
@@ -308,7 +312,7 @@ InstructionContext RecognizableInstr::insnContext() const {
         llvm_unreachable("Invalid prefix");
       }
     }
-    else if (HasEVEX_L2Prefix && HasVEX_WPrefix) {
+    else if (HasEVEX_L2Prefix && VEX_WPrefix == X86Local::VEX_W1) {
       // EVEX_L2 & VEX_W
       if (OpPrefix == X86Local::PD)
         insnContext = EVEX_KB(IC_EVEX_L2_W_OPSIZE);
@@ -337,7 +341,7 @@ InstructionContext RecognizableInstr::insnContext() const {
         llvm_unreachable("Invalid prefix");
       }
     }
-    else if (HasVEX_WPrefix) {
+    else if (VEX_WPrefix == X86Local::VEX_W1) {
       // VEX_W
       if (OpPrefix == X86Local::PD)
         insnContext = EVEX_KB(IC_EVEX_W_OPSIZE);
@@ -363,7 +367,7 @@ InstructionContext RecognizableInstr::insnContext() const {
       insnContext = EVEX_KB(IC_EVEX);
     /// eof EVEX
   } else if (Encoding == X86Local::VEX || Encoding == X86Local::XOP) {
-    if (HasVEX_LPrefix && HasVEX_WPrefix) {
+    if (HasVEX_LPrefix && VEX_WPrefix == X86Local::VEX_W1) {
       if (OpPrefix == X86Local::PD)
         insnContext = IC_VEX_L_W_OPSIZE;
       else if (OpPrefix == X86Local::XS)
@@ -378,7 +382,7 @@ InstructionContext RecognizableInstr::insnContext() const {
       }
     } else if (OpPrefix == X86Local::PD && HasVEX_LPrefix)
       insnContext = IC_VEX_L_OPSIZE;
-    else if (OpPrefix == X86Local::PD && HasVEX_WPrefix)
+    else if (OpPrefix == X86Local::PD && VEX_WPrefix == X86Local::VEX_W1)
       insnContext = IC_VEX_W_OPSIZE;
     else if (OpPrefix == X86Local::PD)
       insnContext = IC_VEX_OPSIZE;
@@ -386,11 +390,11 @@ InstructionContext RecognizableInstr::insnContext() const {
       insnContext = IC_VEX_L_XS;
     else if (HasVEX_LPrefix && OpPrefix == X86Local::XD)
       insnContext = IC_VEX_L_XD;
-    else if (HasVEX_WPrefix && OpPrefix == X86Local::XS)
+    else if (VEX_WPrefix == X86Local::VEX_W1 && OpPrefix == X86Local::XS)
       insnContext = IC_VEX_W_XS;
-    else if (HasVEX_WPrefix && OpPrefix == X86Local::XD)
+    else if (VEX_WPrefix == X86Local::VEX_W1 && OpPrefix == X86Local::XD)
       insnContext = IC_VEX_W_XD;
-    else if (HasVEX_WPrefix && OpPrefix == X86Local::PS)
+    else if (VEX_WPrefix == X86Local::VEX_W1 && OpPrefix == X86Local::PS)
       insnContext = IC_VEX_W;
     else if (HasVEX_LPrefix && OpPrefix == X86Local::PS)
       insnContext = IC_VEX_L;
@@ -457,10 +461,12 @@ void RecognizableInstr::adjustOperandEncoding(OperandEncoding &encoding) {
   // The scaling factor for AVX512 compressed displacement encoding is an
   // instruction attribute.  Adjust the ModRM encoding type to include the
   // scale for compressed displacement.
-  if (encoding != ENCODING_RM || CD8_Scale == 0)
+  if ((encoding != ENCODING_RM && encoding != ENCODING_VSIB) ||CD8_Scale == 0)
     return;
   encoding = (OperandEncoding)(encoding + Log2_32(CD8_Scale));
-  assert(encoding <= ENCODING_RM_CD64 && "Invalid CDisp scaling");
+  assert(((encoding >= ENCODING_RM && encoding <= ENCODING_RM_CD64) ||
+          (encoding >= ENCODING_VSIB && encoding <= ENCODING_VSIB_CD64)) &&
+         "Invalid CDisp scaling");
 }
 
 void RecognizableInstr::handleOperand(bool optional, unsigned &operandIndex,
@@ -944,121 +950,121 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
     // For OpSize16 instructions, a declared 16-bit register or
     // immediate encoding is special.
     TYPE("GR16",              TYPE_Rv)
-    TYPE("i16imm",            TYPE_IMMv)
   } else if(OpSize == X86Local::OpSize32) {
     // For OpSize32 instructions, a declared 32-bit register or
     // immediate encoding is special.
     TYPE("GR32",              TYPE_Rv)
   }
-  TYPE("i16mem",              TYPE_Mv)
-  TYPE("i16imm",              TYPE_IMM16)
-  TYPE("i16i8imm",            TYPE_IMMv)
+  TYPE("i16mem",              TYPE_M)
+  TYPE("i16imm",              TYPE_IMM)
+  TYPE("i16i8imm",            TYPE_IMM)
   TYPE("GR16",                TYPE_R16)
-  TYPE("i32mem",              TYPE_Mv)
-  TYPE("i32imm",              TYPE_IMMv)
-  TYPE("i32i8imm",            TYPE_IMM32)
+  TYPE("i32mem",              TYPE_M)
+  TYPE("i32imm",              TYPE_IMM)
+  TYPE("i32i8imm",            TYPE_IMM)
   TYPE("GR32",                TYPE_R32)
   TYPE("GR32orGR64",          TYPE_R32)
-  TYPE("i64mem",              TYPE_Mv)
-  TYPE("i64i32imm",           TYPE_IMM64)
-  TYPE("i64i8imm",            TYPE_IMM64)
+  TYPE("i64mem",              TYPE_M)
+  TYPE("i64i32imm",           TYPE_IMM)
+  TYPE("i64i8imm",            TYPE_IMM)
   TYPE("GR64",                TYPE_R64)
-  TYPE("i8mem",               TYPE_M8)
-  TYPE("i8imm",               TYPE_IMM8)
+  TYPE("i8mem",               TYPE_M)
+  TYPE("i8imm",               TYPE_IMM)
   TYPE("u8imm",               TYPE_UIMM8)
   TYPE("i32u8imm",            TYPE_UIMM8)
   TYPE("GR8",                 TYPE_R8)
-  TYPE("VR128",               TYPE_XMM128)
-  TYPE("VR128X",              TYPE_XMM128)
-  TYPE("f128mem",             TYPE_M128)
-  TYPE("f256mem",             TYPE_M256)
-  TYPE("f512mem",             TYPE_M512)
-  TYPE("FR128",               TYPE_XMM128)
-  TYPE("FR64",                TYPE_XMM64)
-  TYPE("FR64X",               TYPE_XMM64)
-  TYPE("f64mem",              TYPE_M64FP)
-  TYPE("sdmem",               TYPE_M64FP)
-  TYPE("FR32",                TYPE_XMM32)
-  TYPE("FR32X",               TYPE_XMM32)
-  TYPE("f32mem",              TYPE_M32FP)
-  TYPE("ssmem",               TYPE_M32FP)
+  TYPE("VR128",               TYPE_XMM)
+  TYPE("VR128X",              TYPE_XMM)
+  TYPE("f128mem",             TYPE_M)
+  TYPE("f256mem",             TYPE_M)
+  TYPE("f512mem",             TYPE_M)
+  TYPE("FR128",               TYPE_XMM)
+  TYPE("FR64",                TYPE_XMM)
+  TYPE("FR64X",               TYPE_XMM)
+  TYPE("f64mem",              TYPE_M)
+  TYPE("sdmem",               TYPE_M)
+  TYPE("FR32",                TYPE_XMM)
+  TYPE("FR32X",               TYPE_XMM)
+  TYPE("f32mem",              TYPE_M)
+  TYPE("ssmem",               TYPE_M)
   TYPE("RST",                 TYPE_ST)
-  TYPE("i128mem",             TYPE_M128)
-  TYPE("i256mem",             TYPE_M256)
-  TYPE("i512mem",             TYPE_M512)
-  TYPE("i64i32imm_pcrel",     TYPE_REL64)
-  TYPE("i16imm_pcrel",        TYPE_REL16)
-  TYPE("i32imm_pcrel",        TYPE_REL32)
+  TYPE("i128mem",             TYPE_M)
+  TYPE("i256mem",             TYPE_M)
+  TYPE("i512mem",             TYPE_M)
+  TYPE("i64i32imm_pcrel",     TYPE_REL)
+  TYPE("i16imm_pcrel",        TYPE_REL)
+  TYPE("i32imm_pcrel",        TYPE_REL)
   TYPE("SSECC",               TYPE_IMM3)
   TYPE("XOPCC",               TYPE_IMM3)
   TYPE("AVXCC",               TYPE_IMM5)
   TYPE("AVX512ICC",           TYPE_AVX512ICC)
-  TYPE("AVX512RC",            TYPE_IMM32)
-  TYPE("brtarget32",          TYPE_RELv)
-  TYPE("brtarget16",          TYPE_RELv)
-  TYPE("brtarget8",           TYPE_REL8)
-  TYPE("f80mem",              TYPE_M80FP)
-  TYPE("lea64_32mem",         TYPE_LEA)
-  TYPE("lea64mem",            TYPE_LEA)
+  TYPE("AVX512RC",            TYPE_IMM)
+  TYPE("brtarget32",          TYPE_REL)
+  TYPE("brtarget16",          TYPE_REL)
+  TYPE("brtarget8",           TYPE_REL)
+  TYPE("f80mem",              TYPE_M)
+  TYPE("lea64_32mem",         TYPE_M)
+  TYPE("lea64mem",            TYPE_M)
   TYPE("VR64",                TYPE_MM64)
-  TYPE("i64imm",              TYPE_IMMv)
+  TYPE("i64imm",              TYPE_IMM)
   TYPE("anymem",              TYPE_M)
-  TYPE("opaque32mem",         TYPE_M1616)
-  TYPE("opaque48mem",         TYPE_M1632)
-  TYPE("opaque80mem",         TYPE_M1664)
-  TYPE("opaque512mem",        TYPE_M512)
+  TYPE("opaque32mem",         TYPE_M)
+  TYPE("opaque48mem",         TYPE_M)
+  TYPE("opaque80mem",         TYPE_M)
+  TYPE("opaque512mem",        TYPE_M)
   TYPE("SEGMENT_REG",         TYPE_SEGMENTREG)
   TYPE("DEBUG_REG",           TYPE_DEBUGREG)
   TYPE("CONTROL_REG",         TYPE_CONTROLREG)
-  TYPE("srcidx8",             TYPE_SRCIDX8)
-  TYPE("srcidx16",            TYPE_SRCIDX16)
-  TYPE("srcidx32",            TYPE_SRCIDX32)
-  TYPE("srcidx64",            TYPE_SRCIDX64)
-  TYPE("dstidx8",             TYPE_DSTIDX8)
-  TYPE("dstidx16",            TYPE_DSTIDX16)
-  TYPE("dstidx32",            TYPE_DSTIDX32)
-  TYPE("dstidx64",            TYPE_DSTIDX64)
-  TYPE("offset16_8",          TYPE_MOFFS8)
-  TYPE("offset16_16",         TYPE_MOFFS16)
-  TYPE("offset16_32",         TYPE_MOFFS32)
-  TYPE("offset32_8",          TYPE_MOFFS8)
-  TYPE("offset32_16",         TYPE_MOFFS16)
-  TYPE("offset32_32",         TYPE_MOFFS32)
-  TYPE("offset32_64",         TYPE_MOFFS64)
-  TYPE("offset64_8",          TYPE_MOFFS8)
-  TYPE("offset64_16",         TYPE_MOFFS16)
-  TYPE("offset64_32",         TYPE_MOFFS32)
-  TYPE("offset64_64",         TYPE_MOFFS64)
-  TYPE("VR256",               TYPE_XMM256)
-  TYPE("VR256X",              TYPE_XMM256)
-  TYPE("VR512",               TYPE_XMM512)
-  TYPE("VK1",                 TYPE_VK1)
-  TYPE("VK1WM",               TYPE_VK1)
-  TYPE("VK2",                 TYPE_VK2)
-  TYPE("VK2WM",               TYPE_VK2)
-  TYPE("VK4",                 TYPE_VK4)
-  TYPE("VK4WM",               TYPE_VK4)
-  TYPE("VK8",                 TYPE_VK8)
-  TYPE("VK8WM",               TYPE_VK8)
-  TYPE("VK16",                TYPE_VK16)
-  TYPE("VK16WM",              TYPE_VK16)
-  TYPE("VK32",                TYPE_VK32)
-  TYPE("VK32WM",              TYPE_VK32)
-  TYPE("VK64",                TYPE_VK64)
-  TYPE("VK64WM",              TYPE_VK64)
+  TYPE("srcidx8",             TYPE_SRCIDX)
+  TYPE("srcidx16",            TYPE_SRCIDX)
+  TYPE("srcidx32",            TYPE_SRCIDX)
+  TYPE("srcidx64",            TYPE_SRCIDX)
+  TYPE("dstidx8",             TYPE_DSTIDX)
+  TYPE("dstidx16",            TYPE_DSTIDX)
+  TYPE("dstidx32",            TYPE_DSTIDX)
+  TYPE("dstidx64",            TYPE_DSTIDX)
+  TYPE("offset16_8",          TYPE_MOFFS)
+  TYPE("offset16_16",         TYPE_MOFFS)
+  TYPE("offset16_32",         TYPE_MOFFS)
+  TYPE("offset32_8",          TYPE_MOFFS)
+  TYPE("offset32_16",         TYPE_MOFFS)
+  TYPE("offset32_32",         TYPE_MOFFS)
+  TYPE("offset32_64",         TYPE_MOFFS)
+  TYPE("offset64_8",          TYPE_MOFFS)
+  TYPE("offset64_16",         TYPE_MOFFS)
+  TYPE("offset64_32",         TYPE_MOFFS)
+  TYPE("offset64_64",         TYPE_MOFFS)
+  TYPE("VR256",               TYPE_YMM)
+  TYPE("VR256X",              TYPE_YMM)
+  TYPE("VR512",               TYPE_ZMM)
+  TYPE("VK1",                 TYPE_VK)
+  TYPE("VK1WM",               TYPE_VK)
+  TYPE("VK2",                 TYPE_VK)
+  TYPE("VK2WM",               TYPE_VK)
+  TYPE("VK4",                 TYPE_VK)
+  TYPE("VK4WM",               TYPE_VK)
+  TYPE("VK8",                 TYPE_VK)
+  TYPE("VK8WM",               TYPE_VK)
+  TYPE("VK16",                TYPE_VK)
+  TYPE("VK16WM",              TYPE_VK)
+  TYPE("VK32",                TYPE_VK)
+  TYPE("VK32WM",              TYPE_VK)
+  TYPE("VK64",                TYPE_VK)
+  TYPE("VK64WM",              TYPE_VK)
   TYPE("GR32_NOAX",           TYPE_Rv)
-  TYPE("vx64mem",             TYPE_M64)
-  TYPE("vx128mem",            TYPE_M128)
-  TYPE("vx256mem",            TYPE_M256)
-  TYPE("vy128mem",            TYPE_M128)
-  TYPE("vy256mem",            TYPE_M256)
-  TYPE("vx64xmem",            TYPE_M64)
-  TYPE("vx128xmem",           TYPE_M128)
-  TYPE("vx256xmem",           TYPE_M256)
-  TYPE("vy128xmem",           TYPE_M128)
-  TYPE("vy256xmem",           TYPE_M256)
-  TYPE("vy512mem",            TYPE_M512)
-  TYPE("vz512mem",            TYPE_M512)
+  TYPE("vx64mem",             TYPE_M)
+  TYPE("vx128mem",            TYPE_M)
+  TYPE("vx256mem",            TYPE_M)
+  TYPE("vy128mem",            TYPE_M)
+  TYPE("vy256mem",            TYPE_M)
+  TYPE("vx64xmem",            TYPE_M)
+  TYPE("vx128xmem",           TYPE_M)
+  TYPE("vx256xmem",           TYPE_M)
+  TYPE("vy128xmem",           TYPE_M)
+  TYPE("vy256xmem",           TYPE_M)
+  TYPE("vy512mem",            TYPE_M)
+  TYPE("vz256xmem",           TYPE_M)
+  TYPE("vz512mem",            TYPE_M)
   TYPE("BNDR",                TYPE_BNDR)
   errs() << "Unhandled type string " << s << "\n";
   llvm_unreachable("Unhandled type string");
@@ -1242,18 +1248,19 @@ RecognizableInstr::memoryEncodingFromString(const std::string &s,
   ENCODING("opaque48mem",     ENCODING_RM)
   ENCODING("opaque80mem",     ENCODING_RM)
   ENCODING("opaque512mem",    ENCODING_RM)
-  ENCODING("vx64mem",         ENCODING_RM)
-  ENCODING("vx128mem",        ENCODING_RM)
-  ENCODING("vx256mem",        ENCODING_RM)
-  ENCODING("vy128mem",        ENCODING_RM)
-  ENCODING("vy256mem",        ENCODING_RM)
-  ENCODING("vx64xmem",        ENCODING_RM)
-  ENCODING("vx128xmem",       ENCODING_RM)
-  ENCODING("vx256xmem",       ENCODING_RM)
-  ENCODING("vy128xmem",       ENCODING_RM)
-  ENCODING("vy256xmem",       ENCODING_RM)
-  ENCODING("vy512mem",        ENCODING_RM)
-  ENCODING("vz512mem",        ENCODING_RM)
+  ENCODING("vx64mem",         ENCODING_VSIB)
+  ENCODING("vx128mem",        ENCODING_VSIB)
+  ENCODING("vx256mem",        ENCODING_VSIB)
+  ENCODING("vy128mem",        ENCODING_VSIB)
+  ENCODING("vy256mem",        ENCODING_VSIB)
+  ENCODING("vx64xmem",        ENCODING_VSIB)
+  ENCODING("vx128xmem",       ENCODING_VSIB)
+  ENCODING("vx256xmem",       ENCODING_VSIB)
+  ENCODING("vy128xmem",       ENCODING_VSIB)
+  ENCODING("vy256xmem",       ENCODING_VSIB)
+  ENCODING("vy512mem",        ENCODING_VSIB)
+  ENCODING("vz256xmem",       ENCODING_VSIB)
+  ENCODING("vz512mem",        ENCODING_VSIB)
   errs() << "Unhandled memory encoding " << s << "\n";
   llvm_unreachable("Unhandled memory encoding");
 }
diff --git a/utils/TableGen/X86RecognizableInstr.h b/utils/TableGen/X86RecognizableInstr.h
index 2e611587cc316..91ed928540c36 100644
--- a/utils/TableGen/X86RecognizableInstr.h
+++ b/utils/TableGen/X86RecognizableInstr.h
@@ -55,8 +55,8 @@ private:
   bool HasREX_WPrefix;
   /// The hasVEX_4V field from the record
   bool HasVEX_4V;
-  /// The hasVEX_WPrefix field from the record
-  bool HasVEX_WPrefix;
+  /// The VEX_WPrefix field from the record
+  uint8_t VEX_WPrefix;
   /// Inferred from the operands; indicates whether the L bit in the VEX prefix is set
   bool HasVEX_LPrefix;
   /// The ignoreVEX_L field from the record
diff --git a/utils/bisect-skip-count b/utils/bisect-skip-count
new file mode 100755
index 0000000000000..b18b4f41481b6
--- /dev/null
+++ b/utils/bisect-skip-count
@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+# This script is used to bisect skip and count arguments for --debug-counter.
+# It is similar to bisect, except it understands how to increase skip and decrease count
+import os
+import sys
+import argparse
+# This is for timeout support. Use the recommended way of import.
+# We do timeouts because when doing, execution testing, we have a habit
+# of finding variants that infinite loop
+if os.name == 'posix' and sys.version_info[0] < 3:
+  import subprocess32 as subprocess
+else:
+  import subprocess
+parser = argparse.ArgumentParser()
+
+parser.add_argument('--skipstart', type=int, default=0)
+parser.add_argument('--skipend', type=int, default=(1 << 32))
+parser.add_argument('--countstart', type=int, default=0)
+parser.add_argument('--countend', type=int, default=(1 << 32))
+parser.add_argument('--timeout', type=int, default=None)
+# Use shell support if you need to use complex shell expressions in your command
+parser.add_argument('--shell', type=bool, default=False)
+parser.add_argument('command', nargs='+')
+
+args = parser.parse_args()
+
+start = args.skipstart
+end = args.skipend
+
+print("Bisect of Skip starting!")
+print("Start: %d" % start)
+print("End: %d" % end)
+
+last = None
+while start != end and start != end-1:
+    count = start + (end - start)/2
+    print("Visiting Skip: %d with (Start, End) = (%d,%d)" % (count, start, end))
+    cmd = [x % {'skip':count, 'count':-1} for x in args.command]
+    print cmd
+    try:
+        result = subprocess.call(cmd, shell=args.shell, timeout=args.timeout)
+        if result == 0:
+           print("    PASSES! Setting end to count")
+           end = count
+        else:
+           print("    FAILS! Setting start to count")
+           start = count
+    except:
+        print(" TIMEOUT, setting end to count")
+        end = count
+firstcount = start
+print("Last good skip: %d" % start)
+start = args.countstart
+end = args.countend
+print("Bisect of Count starting!")
+print("Start: %d" % start)
+print("End: %d" % end)
+while start != end and start != end-1:
+    count = start + (end - start)/2
+    print("Visiting Count: %d with (Start, End) = (%d,%d)" % (count, start, end))
+    cmd = [x % {'count':count, 'skip':firstcount } for x in args.command]
+    print cmd
+    try:
+        result = subprocess.call(cmd, shell=args.shell, timeout=args.timeout)
+        if result == 0:
+           print("    PASSES! Setting start to count")
+           start = count
+        else:
+           print("    FAILS! Setting end to count")
+           end = count
+    except:
+        print(" TIMEOUT, setting start to count")
+        start = count
+
+print("Last good count: %d" % start)
diff --git a/utils/gdb-scripts/prettyprinters.py b/utils/gdb-scripts/prettyprinters.py
index 5385e16246b3f..be21b7083f32d 100644
--- a/utils/gdb-scripts/prettyprinters.py
+++ b/utils/gdb-scripts/prettyprinters.py
@@ -193,6 +193,113 @@ class DenseMapPrinter:
   def display_hint(self):
     return 'map'
 
+class TwinePrinter:
+  "Print a Twine"
+
+  def __init__(self, val):
+    self._val = val
+
+  def display_hint(self):
+    return 'string'
+
+  def string_from_pretty_printer_lookup(self, val):
+    '''Lookup the default pretty-printer for val and use it.
+
+    If no pretty-printer is defined for the type of val, print an error and
+    return a placeholder string.'''
+
+    pp = gdb.default_visualizer(val)
+    if pp:
+      s = pp.to_string()
+
+      # The pretty-printer may return a LazyString instead of an actual Python
+      # string.  Convert it to a Python string.  However, GDB doesn't seem to
+      # register the LazyString type, so we can't check
+      # "type(s) == gdb.LazyString".
+      if 'LazyString' in type(s).__name__:
+        s = s.value().address.string()
+
+    else:
+      print(('No pretty printer for {} found. The resulting Twine ' +
+             'representation will be incomplete.').format(val.type.name))
+      s = '(missing {})'.format(val.type.name)
+
+    return s
+
+  def string_from_child(self, child, kind):
+    '''Return the string representation of the Twine::Child child.'''
+
+    if kind in ('llvm::Twine::EmptyKind', 'llvm::Twine::NullKind'):
+      return ''
+
+    if kind == 'llvm::Twine::TwineKind':
+      return self.string_from_twine_object(child['twine'].dereference())
+
+    if kind == 'llvm::Twine::CStringKind':
+      return child['cString'].string()
+
+    if kind == 'llvm::Twine::StdStringKind':
+      val = child['stdString'].dereference()
+      return self.string_from_pretty_printer_lookup(val)
+
+    if kind == 'llvm::Twine::StringRefKind':
+      val = child['stringRef'].dereference()
+      pp = StringRefPrinter(val)
+      return pp.to_string()
+
+    if kind == 'llvm::Twine::SmallStringKind':
+      val = child['smallString'].dereference()
+      pp = SmallStringPrinter(val)
+      return pp.to_string()
+
+    if kind == 'llvm::Twine::CharKind':
+      return chr(child['character'])
+
+    if kind == 'llvm::Twine::DecUIKind':
+      return str(child['decUI'])
+
+    if kind == 'llvm::Twine::DecIKind':
+      return str(child['decI'])
+
+    if kind == 'llvm::Twine::DecULKind':
+      return str(child['decUL'].dereference())
+
+    if kind == 'llvm::Twine::DecLKind':
+      return str(child['decL'].dereference())
+
+    if kind == 'llvm::Twine::DecULLKind':
+      return str(child['decULL'].dereference())
+
+    if kind == 'llvm::Twine::DecLLKind':
+      return str(child['decLL'].dereference())
+
+    if kind == 'llvm::Twine::UHexKind':
+      val = child['uHex'].dereference()
+      return hex(int(val))
+
+    print(('Unhandled NodeKind {} in Twine pretty-printer. The result will be '
+           'incomplete.').format(kind))
+
+    return '(unhandled {})'.format(kind)
+
+  def string_from_twine_object(self, twine):
+    '''Return the string representation of the Twine object twine.'''
+
+    lhs_str = ''
+    rhs_str = ''
+
+    lhs = twine['LHS']
+    rhs = twine['RHS']
+    lhs_kind = str(twine['LHSKind'])
+    rhs_kind = str(twine['RHSKind'])
+
+    lhs_str = self.string_from_child(lhs, lhs_kind)
+    rhs_str = self.string_from_child(rhs, rhs_kind)
+
+    return lhs_str + rhs_str
+
+  def to_string(self):
+    return self.string_from_twine_object(self._val)
 
 pp = gdb.printing.RegexpCollectionPrettyPrinter("LLVMSupport")
 pp.add_printer('llvm::SmallString', '^llvm::SmallString<.*>$', SmallStringPrinter)
@@ -201,4 +308,5 @@ pp.add_printer('llvm::SmallVectorImpl', '^llvm::SmallVector(Impl)?<.*>$', SmallV
 pp.add_printer('llvm::ArrayRef', '^llvm::(Const)?ArrayRef<.*>$', ArrayRefPrinter)
 pp.add_printer('llvm::Optional', '^llvm::Optional<.*>$', OptionalPrinter)
 pp.add_printer('llvm::DenseMap', '^llvm::DenseMap<.*>$', DenseMapPrinter)
+pp.add_printer('llvm::Twine', '^llvm::Twine$', TwinePrinter)
 gdb.printing.register_pretty_printer(gdb.current_objfile(), pp)
diff --git a/utils/lit/lit/BooleanExpression.py b/utils/lit/lit/BooleanExpression.py
new file mode 100644
index 0000000000000..3eb5060de3e30
--- /dev/null
+++ b/utils/lit/lit/BooleanExpression.py
@@ -0,0 +1,251 @@
+import re
+
+class BooleanExpression:
+    # A simple evaluator of boolean expressions.
+    #
+    # Grammar:
+    #   expr       :: or_expr
+    #   or_expr    :: and_expr ('||' and_expr)*
+    #   and_expr   :: not_expr ('&&' not_expr)*
+    #   not_expr   :: '!' not_expr
+    #                 '(' or_expr ')'
+    #                 identifier
+    #   identifier :: [-+=._a-zA-Z0-9]+
+
+    # Evaluates `string` as a boolean expression.
+    # Returns True or False. Throws a ValueError on syntax error.
+    #
+    # Variables in `variables` are true.
+    # Substrings of `triple` are true.
+    # 'true' is true.
+    # All other identifiers are false.
+    @staticmethod
+    def evaluate(string, variables, triple=""):
+        try:
+            parser = BooleanExpression(string, set(variables), triple)
+            return parser.parseAll()
+        except ValueError as e:
+            raise ValueError(str(e) + ('\nin expression: %r' % string))
+
+    #####
+
+    def __init__(self, string, variables, triple=""):
+        self.tokens = BooleanExpression.tokenize(string)
+        self.variables = variables
+        self.variables.add('true')
+        self.triple = triple
+        self.value = None
+        self.token = None
+
+    # Singleton end-of-expression marker.
+    END = object()
+
+    # Tokenization pattern.
+    Pattern = re.compile(r'\A\s*([()]|[-+=._a-zA-Z0-9]+|&&|\|\||!)\s*(.*)\Z')
+
+    @staticmethod
+    def tokenize(string):
+        while True:
+            m = re.match(BooleanExpression.Pattern, string)
+            if m is None:
+                if string == "":
+                    yield BooleanExpression.END;
+                    return
+                else:
+                    raise ValueError("couldn't parse text: %r" % string)
+
+            token = m.group(1)
+            string = m.group(2)
+            yield token
+
+    def quote(self, token):
+        if token is BooleanExpression.END:
+            return '<end of expression>'
+        else:
+            return repr(token)
+
+    def accept(self, t):
+        if self.token == t:
+            self.token = next(self.tokens)
+            return True
+        else:
+            return False
+
+    def expect(self, t):
+        if self.token == t:
+            if self.token != BooleanExpression.END:
+                self.token = next(self.tokens)
+        else:
+            raise ValueError("expected: %s\nhave: %s" %
+                             (self.quote(t), self.quote(self.token)))
+
+    def isIdentifier(self, t):
+        if (t is BooleanExpression.END or t == '&&' or t == '||' or
+            t == '!' or t == '(' or t == ')'):
+            return False
+        return True
+
+    def parseNOT(self):
+        if self.accept('!'):
+            self.parseNOT()
+            self.value = not self.value
+        elif self.accept('('):
+            self.parseOR()
+            self.expect(')')
+        elif not self.isIdentifier(self.token):
+            raise ValueError("expected: '!' or '(' or identifier\nhave: %s" %
+                             self.quote(self.token))
+        else:
+            self.value = (self.token in self.variables or
+                          self.token in self.triple)
+            self.token = next(self.tokens)
+
+    def parseAND(self):
+        self.parseNOT()
+        while self.accept('&&'):
+            left = self.value
+            self.parseNOT()
+            right = self.value
+            # this is technically the wrong associativity, but it
+            # doesn't matter for this limited expression grammar
+            self.value = left and right
+
+    def parseOR(self):
+        self.parseAND()
+        while self.accept('||'):
+            left = self.value
+            self.parseAND()
+            right = self.value
+            # this is technically the wrong associativity, but it
+            # doesn't matter for this limited expression grammar
+            self.value = left or right
+
+    def parseAll(self):
+        self.token = next(self.tokens)
+        self.parseOR()
+        self.expect(BooleanExpression.END)
+        return self.value
+
+
+#######
+# Tests
+
+import unittest
+
+class TestBooleanExpression(unittest.TestCase):
+    def test_variables(self):
+        variables = {'its-true', 'false-lol-true', 'under_score',
+                     'e=quals', 'd1g1ts'}
+        self.assertTrue(BooleanExpression.evaluate('true', variables))
+        self.assertTrue(BooleanExpression.evaluate('its-true', variables))
+        self.assertTrue(BooleanExpression.evaluate('false-lol-true', variables))
+        self.assertTrue(BooleanExpression.evaluate('under_score', variables))
+        self.assertTrue(BooleanExpression.evaluate('e=quals', variables))
+        self.assertTrue(BooleanExpression.evaluate('d1g1ts', variables))
+
+        self.assertFalse(BooleanExpression.evaluate('false', variables))
+        self.assertFalse(BooleanExpression.evaluate('True', variables))
+        self.assertFalse(BooleanExpression.evaluate('true-ish', variables))
+        self.assertFalse(BooleanExpression.evaluate('not_true', variables))
+        self.assertFalse(BooleanExpression.evaluate('tru', variables))
+
+    def test_triple(self):
+        triple = 'arch-vendor-os'
+        self.assertTrue(BooleanExpression.evaluate('arch-', {}, triple))
+        self.assertTrue(BooleanExpression.evaluate('ar', {}, triple))
+        self.assertTrue(BooleanExpression.evaluate('ch-vend', {}, triple))
+        self.assertTrue(BooleanExpression.evaluate('-vendor-', {}, triple))
+        self.assertTrue(BooleanExpression.evaluate('-os', {}, triple))
+        self.assertFalse(BooleanExpression.evaluate('arch-os', {}, triple))
+
+    def test_operators(self):
+        self.assertTrue(BooleanExpression.evaluate('true || true', {}))
+        self.assertTrue(BooleanExpression.evaluate('true || false', {}))
+        self.assertTrue(BooleanExpression.evaluate('false || true', {}))
+        self.assertFalse(BooleanExpression.evaluate('false || false', {}))
+
+        self.assertTrue(BooleanExpression.evaluate('true && true', {}))
+        self.assertFalse(BooleanExpression.evaluate('true && false', {}))
+        self.assertFalse(BooleanExpression.evaluate('false && true', {}))
+        self.assertFalse(BooleanExpression.evaluate('false && false', {}))
+
+        self.assertFalse(BooleanExpression.evaluate('!true', {}))
+        self.assertTrue(BooleanExpression.evaluate('!false', {}))
+
+        self.assertTrue(BooleanExpression.evaluate('   ((!((false) ))   ) ', {}))
+        self.assertTrue(BooleanExpression.evaluate('true && (true && (true))', {}))
+        self.assertTrue(BooleanExpression.evaluate('!false && !false && !! !false', {}))
+        self.assertTrue(BooleanExpression.evaluate('false && false || true', {}))
+        self.assertTrue(BooleanExpression.evaluate('(false && false) || true', {}))
+        self.assertFalse(BooleanExpression.evaluate('false && (false || true)', {}))
+
+    # Evaluate boolean expression `expr`.
+    # Fail if it does not throw a ValueError containing the text `error`.
+    def checkException(self, expr, error):
+        try:
+            BooleanExpression.evaluate(expr, {})
+            self.fail("expression %r didn't cause an exception" % expr)
+        except ValueError as e:
+            if -1 == str(e).find(error):
+                self.fail(("expression %r caused the wrong ValueError\n" +
+                           "actual error was:\n%s\n" +
+                           "expected error was:\n%s\n") % (expr, e, error))
+        except BaseException as e:
+            self.fail(("expression %r caused the wrong exception; actual " + 
+                      "exception was: \n%r") % (expr, e))
+
+    def test_errors(self):
+        self.checkException("ba#d",
+                            "couldn't parse text: '#d'\n" +
+                            "in expression: 'ba#d'")
+
+        self.checkException("true and true",
+                            "expected: <end of expression>\n" +
+                            "have: 'and'\n" +
+                            "in expression: 'true and true'")
+
+        self.checkException("|| true",
+                            "expected: '!' or '(' or identifier\n" +
+                            "have: '||'\n" +
+                            "in expression: '|| true'")
+
+        self.checkException("true &&",
+                            "expected: '!' or '(' or identifier\n" +
+                            "have: <end of expression>\n" +
+                            "in expression: 'true &&'")
+
+        self.checkException("",
+                            "expected: '!' or '(' or identifier\n" +
+                            "have: <end of expression>\n" +
+                            "in expression: ''")
+
+        self.checkException("*",
+                            "couldn't parse text: '*'\n" +
+                            "in expression: '*'")
+
+        self.checkException("no wait stop",
+                            "expected: <end of expression>\n" +
+                            "have: 'wait'\n" +
+                            "in expression: 'no wait stop'")
+
+        self.checkException("no-$-please",
+                            "couldn't parse text: '$-please'\n" +
+                            "in expression: 'no-$-please'")
+
+        self.checkException("(((true && true) || true)",
+                            "expected: ')'\n" +
+                            "have: <end of expression>\n" +
+                            "in expression: '(((true && true) || true)'")
+
+        self.checkException("true (true)",
+                            "expected: <end of expression>\n" +
+                            "have: '('\n" +
+                            "in expression: 'true (true)'")
+
+        self.checkException("( )",
+                            "expected: '!' or '(' or identifier\n" +
+                            "have: ')'\n" +
+                            "in expression: '( )'")
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/utils/lit/lit/LitConfig.py b/utils/lit/lit/LitConfig.py
index fc50ffc1039d2..2b680846e1767 100644
--- a/utils/lit/lit/LitConfig.py
+++ b/utils/lit/lit/LitConfig.py
@@ -24,7 +24,8 @@ class LitConfig(object):
                  noExecute, debug, isWindows,
                  params, config_prefix = None,
                  maxIndividualTestTime = 0,
-                 maxFailures = None):
+                 maxFailures = None,
+                 parallelism_groups = []):
         # The name of the test runner.
         self.progname = progname
         # The items to add to the PATH environment variable.
@@ -62,6 +63,7 @@ class LitConfig(object):
 
         self.maxIndividualTestTime = maxIndividualTestTime
         self.maxFailures = maxFailures
+        self.parallelism_groups = parallelism_groups
 
     @property
     def maxIndividualTestTime(self):
diff --git a/utils/lit/lit/ShCommands.py b/utils/lit/lit/ShCommands.py
index 9ca9e8c91c0d4..01e91c55da989 100644
--- a/utils/lit/lit/ShCommands.py
+++ b/utils/lit/lit/ShCommands.py
@@ -35,6 +35,29 @@ class Command:
             else:
                 file.write("%s%s '%s'" % (r[0][1], r[0][0], r[1]))
 
+class GlobItem:
+    def __init__(self, pattern):
+        self.pattern = pattern
+
+    def __repr__(self):
+        return self.pattern
+
+    def __eq__(self, other):
+        if not isinstance(other, Command):
+            return False
+
+        return (self.pattern == other.pattern)
+
+    def resolve(self, cwd):
+        import glob
+        import os
+        if os.path.isabs(self.pattern):
+           abspath = self.pattern
+        else:
+            abspath = os.path.join(cwd, self.pattern)
+        results = glob.glob(abspath)
+        return [self.pattern] if len(results) == 0 else results
+
 class Pipeline:
     def __init__(self, commands, negate=False, pipe_err=False):
         self.commands = commands
diff --git a/utils/lit/lit/ShUtil.py b/utils/lit/lit/ShUtil.py
index 0b3e0f58c9773..00ec8ab004936 100644
--- a/utils/lit/lit/ShUtil.py
+++ b/utils/lit/lit/ShUtil.py
@@ -2,7 +2,7 @@ from __future__ import absolute_import
 import itertools
 
 import lit.util
-from lit.ShCommands import Command, Pipeline, Seq
+from lit.ShCommands import Command, GlobItem, Pipeline, Seq
 
 class ShLexer:
     def __init__(self, data, win32Escapes = False):
@@ -40,13 +40,15 @@ class ShLexer:
             return None
         
         self.pos = self.pos - 1 + len(chunk)
-        return chunk
+        return GlobItem(chunk) if '*' in chunk or '?' in chunk else chunk
         
     def lex_arg_slow(self, c):
         if c in "'\"":
             str = self.lex_arg_quoted(c)
         else:
             str = c
+        unquoted_glob_char = False
+        quoted_glob_char = False
         while self.pos != self.end:
             c = self.look()
             if c.isspace() or c in "|&;":
@@ -65,12 +67,12 @@ class ShLexer:
                 tok = self.lex_one_token()
                 assert isinstance(tok, tuple) and len(tok) == 1
                 return (tok[0], num)                    
-            elif c == '"':
+            elif c == '"' or c == "'":
                 self.eat()
-                str += self.lex_arg_quoted('"')
-            elif c == "'":
-                self.eat()
-                str += self.lex_arg_quoted("'")
+                quoted_arg = self.lex_arg_quoted(c)
+                if '*' in quoted_arg or '?' in quoted_arg:
+                    quoted_glob_char = True
+                str += quoted_arg
             elif not self.win32Escapes and c == '\\':
                 # Outside of a string, '\\' escapes everything.
                 self.eat()
@@ -79,9 +81,25 @@ class ShLexer:
                         "escape at end of quoted argument in: %r" % self.data)
                     return str
                 str += self.eat()
+            elif c in '*?':
+                unquoted_glob_char = True
+                str += self.eat()
             else:
                 str += self.eat()
-        return str
+        # If a quote character is present, lex_arg_quoted will remove the quotes
+        # and append the argument directly.  This causes a problem when the
+        # quoted portion contains a glob character, as the character will no
+        # longer be treated literally.  If glob characters occur *only* inside
+        # of quotes, then we can handle this by not globbing at all, and if
+        # glob characters occur *only* outside of quotes, we can still glob just
+        # fine.  But if a glob character occurs both inside and outside of
+        # quotes this presents a problem.  In practice this is such an obscure
+        # edge case that it doesn't seem worth the added complexity to support.
+        # By adding an assertion, it means some bot somewhere will catch this
+        # and flag the user of a non-portable test (which could almost certainly
+        # be re-written to work correctly without triggering this).
+        assert not (quoted_glob_char and unquoted_glob_char)
+        return GlobItem(str) if unquoted_glob_char else str
 
     def lex_arg_quoted(self, delim):
         str = ''
@@ -202,7 +220,7 @@ class ShParser:
                 break
 
             # If this is an argument, just add it to the current command.
-            if isinstance(tok, str):
+            if isinstance(tok, (str, GlobItem)):
                 args.append(self.lex())
                 continue
 
diff --git a/utils/lit/lit/Test.py b/utils/lit/lit/Test.py
index 657a7e8140d7a..1a9e3fe80fb39 100644
--- a/utils/lit/lit/Test.py
+++ b/utils/lit/lit/Test.py
@@ -2,6 +2,8 @@ import os
 from xml.sax.saxutils import escape
 from json import JSONEncoder
 
+from lit.BooleanExpression import BooleanExpression
+
 # Test result codes.
 
 class ResultCode(object):
@@ -180,10 +182,24 @@ class Test:
         self.path_in_suite = path_in_suite
         self.config = config
         self.file_path = file_path
-        # A list of conditions under which this test is expected to fail. These
-        # can optionally be provided by test format handlers, and will be
-        # honored when the test result is supplied.
+
+        # A list of conditions under which this test is expected to fail.
+        # Each condition is a boolean expression of features and target
+        # triple parts. These can optionally be provided by test format
+        # handlers, and will be honored when the test result is supplied.
         self.xfails = []
+
+        # A list of conditions that must be satisfied before running the test.
+        # Each condition is a boolean expression of features. All of them
+        # must be True for the test to run.
+        # FIXME should target triple parts count here too?
+        self.requires = []
+
+        # A list of conditions that prevent execution of the test.
+        # Each condition is a boolean expression of features and target
+        # triple parts. All of them must be False for the test to run.
+        self.unsupported = []
+
         # The test result, once complete.
         self.result = None
 
@@ -196,11 +212,16 @@ class Test:
         self.result = result
 
         # Apply the XFAIL handling to resolve the result exit code.
-        if self.isExpectedToFail():
-            if self.result.code == PASS:
-                self.result.code = XPASS
-            elif self.result.code == FAIL:
-                self.result.code = XFAIL
+        try:
+            if self.isExpectedToFail():
+                if self.result.code == PASS:
+                    self.result.code = XPASS
+                elif self.result.code == FAIL:
+                    self.result.code = XFAIL
+        except ValueError as e:
+            # Syntax error in an XFAIL line.
+            self.result.code = UNRESOLVED
+            self.result.output = str(e)
         
     def getFullName(self):
         return self.suite.config.name + ' :: ' + '/'.join(self.path_in_suite)
@@ -224,24 +245,91 @@ class Test:
         configuration. This check relies on the test xfails property which by
         some test formats may not be computed until the test has first been
         executed.
+        Throws ValueError if an XFAIL line has a syntax error.
         """
 
+        features = self.config.available_features
+        triple = getattr(self.suite.config, 'target_triple', "")
+
         # Check if any of the xfails match an available feature or the target.
         for item in self.xfails:
             # If this is the wildcard, it always fails.
             if item == '*':
                 return True
 
-            # If this is an exact match for one of the features, it fails.
-            if item in self.config.available_features:
-                return True
-
-            # If this is a part of the target triple, it fails.
-            if item and item in self.suite.config.target_triple:
-                return True
+            # If this is a True expression of features and target triple parts,
+            # it fails.
+            try:
+                if BooleanExpression.evaluate(item, features, triple):
+                    return True
+            except ValueError as e:
+                raise ValueError('Error in XFAIL list:\n%s' % str(e))
 
         return False
 
+    def isWithinFeatureLimits(self):
+        """
+        isWithinFeatureLimits() -> bool
+
+        A test is within the feature limits set by run_only_tests if
+        1. the test's requirements ARE satisfied by the available features
+        2. the test's requirements ARE NOT satisfied after the limiting
+           features are removed from the available features
+
+        Throws ValueError if a REQUIRES line has a syntax error.
+        """
+
+        if not self.config.limit_to_features:
+            return True  # No limits. Run it.
+
+        # Check the requirements as-is (#1)
+        if self.getMissingRequiredFeatures():
+            return False
+
+        # Check the requirements after removing the limiting features (#2)
+        featuresMinusLimits = [f for f in self.config.available_features
+                               if not f in self.config.limit_to_features]
+        if not self.getMissingRequiredFeaturesFromList(featuresMinusLimits):
+            return False
+
+        return True
+
+    def getMissingRequiredFeaturesFromList(self, features):
+        try:
+            return [item for item in self.requires
+                    if not BooleanExpression.evaluate(item, features)]
+        except ValueError as e:
+            raise ValueError('Error in REQUIRES list:\n%s' % str(e))
+
+    def getMissingRequiredFeatures(self):
+        """
+        getMissingRequiredFeatures() -> list of strings
+
+        Returns a list of features from REQUIRES that are not satisfied."
+        Throws ValueError if a REQUIRES line has a syntax error.
+        """
+
+        features = self.config.available_features
+        return self.getMissingRequiredFeaturesFromList(features)
+
+    def getUnsupportedFeatures(self):
+        """
+        getUnsupportedFeatures() -> list of strings
+
+        Returns a list of features from UNSUPPORTED that are present
+        in the test configuration's features or target triple.
+        Throws ValueError if an UNSUPPORTED line has a syntax error.
+        """
+
+        features = self.config.available_features
+        triple = getattr(self.suite.config, 'target_triple', "")
+
+        try:
+            return [item for item in self.unsupported
+                    if BooleanExpression.evaluate(item, features, triple)]
+        except ValueError as e:
+            raise ValueError('Error in UNSUPPORTED list:\n%s' % str(e))
+
     def isEarlyTest(self):
         """
         isEarlyTest() -> bool
diff --git a/utils/lit/lit/TestRunner.py b/utils/lit/lit/TestRunner.py
index 942ae38a04c5d..3fb9def26ee88 100644
--- a/utils/lit/lit/TestRunner.py
+++ b/utils/lit/lit/TestRunner.py
@@ -5,10 +5,12 @@ import platform
 import tempfile
 import threading
 
+from lit.ShCommands import GlobItem
 import lit.ShUtil as ShUtil
 import lit.Test as Test
 import lit.util
 from lit.util import to_bytes, to_string
+from lit.BooleanExpression import BooleanExpression
 
 class InternalShellError(Exception):
     def __init__(self, command, message):
@@ -140,6 +142,17 @@ def executeShCmd(cmd, shenv, results, timeout=0):
 
     return (finalExitCode, timeoutInfo)
 
+def expand_glob(arg, cwd):
+    if isinstance(arg, GlobItem):
+        return arg.resolve(cwd)
+    return [arg]
+
+def expand_glob_expressions(args, cwd):
+    result = [args[0]]
+    for arg in args[1:]:
+        result.extend(expand_glob(arg, cwd))
+    return result
+
 def quote_windows_command(seq):
     """
     Reimplement Python's private subprocess.list2cmdline for MSys compatibility
@@ -196,6 +209,18 @@ def quote_windows_command(seq):
 
     return ''.join(result)
 
+# cmd is export or env
+def updateEnv(env, cmd):
+    arg_idx = 1
+    for arg_idx, arg in enumerate(cmd.args[1:]):
+        # Partition the string into KEY=VALUE.
+        key, eq, val = arg.partition('=')
+        # Stop if there was no equals.
+        if eq == '':
+            break
+        env.env[key] = val
+    cmd.args = cmd.args[arg_idx+1:]
+
 def _executeShCmd(cmd, shenv, results, timeoutHelper):
     if timeoutHelper.timeoutReached():
         # Prevent further recursion if the timeout has been hit
@@ -239,11 +264,19 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper):
         if os.path.isabs(newdir):
             shenv.cwd = newdir
         else:
-            shenv.cwd = os.path.join(shenv.cwd, newdir)
+            shenv.cwd = os.path.realpath(os.path.join(shenv.cwd, newdir))
         # The cd builtin always succeeds. If the directory does not exist, the
         # following Popen calls will fail instead.
         return 0
 
+    if cmd.commands[0].args[0] == 'export':
+        if len(cmd.commands) != 1:
+            raise ValueError("'export' cannot be part of a pipeline")
+        if len(cmd.commands[0].args) != 2:
+            raise ValueError("'export' supports only one argument")
+        updateEnv(shenv, cmd.commands[0])
+        return 0
+
     procs = []
     input = subprocess.PIPE
     stderrTempFiles = []
@@ -260,15 +293,7 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper):
             # command. There might be multiple envs in a pipeline:
             #   env FOO=1 llc < %s | env BAR=2 llvm-mc | FileCheck %s
             cmd_shenv = ShellEnvironment(shenv.cwd, shenv.env)
-            arg_idx = 1
-            for arg_idx, arg in enumerate(j.args[1:]):
-                # Partition the string into KEY=VALUE.
-                key, eq, val = arg.partition('=')
-                # Stop if there was no equals.
-                if eq == '':
-                    break
-                cmd_shenv.env[key] = val
-            j.args = j.args[arg_idx+1:]
+            updateEnv(cmd_shenv, j)
 
         # Apply the redirections, we use (N,) as a sentinel to indicate stdin,
         # stdout, stderr for N equal to 0, 1, or 2 respectively. Redirects to or
@@ -312,15 +337,19 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper):
             else:
                 if r[2] is None:
                     redir_filename = None
-                    if kAvoidDevNull and r[0] == '/dev/null':
+                    name = expand_glob(r[0], cmd_shenv.cwd)
+                    if len(name) != 1:
+                       raise InternalShellError(j,"Unsupported: glob in redirect expanded to multiple files")
+                    name = name[0]
+                    if kAvoidDevNull and name == '/dev/null':
                         r[2] = tempfile.TemporaryFile(mode=r[1])
-                    elif kIsWindows and r[0] == '/dev/tty':
+                    elif kIsWindows and name == '/dev/tty':
                         # Simulate /dev/tty on Windows.
                         # "CON" is a special filename for the console.
                         r[2] = open("CON", r[1])
                     else:
                         # Make sure relative paths are relative to the cwd.
-                        redir_filename = os.path.join(cmd_shenv.cwd, r[0])
+                        redir_filename = os.path.join(cmd_shenv.cwd, name)
                         r[2] = open(redir_filename, r[1])
                     # Workaround a Win32 and/or subprocess bug when appending.
                     #
@@ -371,6 +400,9 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper):
                     named_temp_files.append(f.name)
                     args[i] = f.name
 
+        # Expand all glob expressions
+        args = expand_glob_expressions(args, cmd_shenv.cwd)
+
         # On Windows, do our own command line quoting for better compatibility
         # with some core utility distributions.
         if kIsWindows:
@@ -686,11 +718,14 @@ def getDefaultSubstitutions(test, tmpDir, tmpBase, normalize_slashes=False):
     substitutions = []
     substitutions.extend([('%%', '#_MARKER_#')])
     substitutions.extend(test.config.substitutions)
+    tmpName = tmpBase + '.tmp'
+    baseName = os.path.basename(tmpBase)
     substitutions.extend([('%s', sourcepath),
                           ('%S', sourcedir),
                           ('%p', sourcedir),
                           ('%{pathsep}', os.pathsep),
-                          ('%t', tmpBase + '.tmp'),
+                          ('%t', tmpName),
+                          ('%basename_t', baseName),
                           ('%T', tmpDir),
                           ('#_MARKER_#', '%')])
 
@@ -746,14 +781,35 @@ class ParserKind(object):
     command.
 
     TAG: A keyword taking no value. Ex 'END.'
-    COMMAND: A Keyword taking a list of shell commands. Ex 'RUN:'
-    LIST: A keyword taking a comma separated list of value. Ex 'XFAIL:'
+    COMMAND: A keyword taking a list of shell commands. Ex 'RUN:'
+    LIST: A keyword taking a comma-separated list of values.
+    BOOLEAN_EXPR: A keyword taking a comma-separated list of 
+        boolean expressions. Ex 'XFAIL:'
     CUSTOM: A keyword with custom parsing semantics.
     """
     TAG = 0
     COMMAND = 1
     LIST = 2
-    CUSTOM = 3
+    BOOLEAN_EXPR = 3
+    CUSTOM = 4
+
+    @staticmethod
+    def allowedKeywordSuffixes(value):
+        return { ParserKind.TAG:          ['.'],
+                 ParserKind.COMMAND:      [':'],
+                 ParserKind.LIST:         [':'],
+                 ParserKind.BOOLEAN_EXPR: [':'],
+                 ParserKind.CUSTOM:       [':', '.']
+               } [value]
+
+    @staticmethod
+    def str(value):
+        return { ParserKind.TAG:          'TAG',
+                 ParserKind.COMMAND:      'COMMAND',
+                 ParserKind.LIST:         'LIST',
+                 ParserKind.BOOLEAN_EXPR: 'BOOLEAN_EXPR',
+                 ParserKind.CUSTOM:       'CUSTOM'
+               } [value]
 
 
 class IntegratedTestKeywordParser(object):
@@ -765,15 +821,18 @@ class IntegratedTestKeywordParser(object):
             ParserKind.CUSTOM.
     """
     def __init__(self, keyword, kind, parser=None, initial_value=None):
-        if not keyword.endswith('.') and not keyword.endswith(':'):
-            raise ValueError("keyword '%s' must end with either '.' or ':' "
-                             % keyword)
-        if keyword.endswith('.') and kind in \
-                [ParserKind.LIST, ParserKind.COMMAND]:
-            raise ValueError("Keyword '%s' should end in ':'" % keyword)
-
-        elif keyword.endswith(':') and kind in [ParserKind.TAG]:
-            raise ValueError("Keyword '%s' should end in '.'" % keyword)
+        allowedSuffixes = ParserKind.allowedKeywordSuffixes(kind)
+        if len(keyword) == 0 or keyword[-1] not in allowedSuffixes:
+            if len(allowedSuffixes) == 1:
+                raise ValueError("Keyword '%s' of kind '%s' must end in '%s'"
+                                 % (keyword, ParserKind.str(kind),
+                                    allowedSuffixes[0]))
+            else:
+                raise ValueError("Keyword '%s' of kind '%s' must end in "
+                                 " one of '%s'"
+                                 % (keyword, ParserKind.str(kind),
+                                    ' '.join(allowedSuffixes)))
+
         if parser is not None and kind != ParserKind.CUSTOM:
             raise ValueError("custom parsers can only be specified with "
                              "ParserKind.CUSTOM")
@@ -787,9 +846,9 @@ class IntegratedTestKeywordParser(object):
             self.parser = self._handleCommand
         elif kind == ParserKind.LIST:
             self.parser = self._handleList
+        elif kind == ParserKind.BOOLEAN_EXPR:
+            self.parser = self._handleBooleanExpr
         elif kind == ParserKind.TAG:
-            if not keyword.endswith('.'):
-                raise ValueError("keyword '%s' should end with '.'" % keyword)
             self.parser = self._handleTag
         elif kind == ParserKind.CUSTOM:
             if parser is None:
@@ -799,8 +858,12 @@ class IntegratedTestKeywordParser(object):
             raise ValueError("Unknown kind '%s'" % kind)
 
     def parseLine(self, line_number, line):
-        self.parsed_lines += [(line_number, line)]
-        self.value = self.parser(line_number, line, self.value)
+        try:
+            self.parsed_lines += [(line_number, line)]
+            self.value = self.parser(line_number, line, self.value)
+        except ValueError as e:
+            raise ValueError(str(e) + ("\nin %s directive on test line %d" %
+                                       (self.keyword, line_number)))
 
     def getValue(self):
         return self.value
@@ -841,12 +904,38 @@ class IntegratedTestKeywordParser(object):
         output.extend([s.strip() for s in line.split(',')])
         return output
 
+    @staticmethod
+    def _handleBooleanExpr(line_number, line, output):
+        """A parser for BOOLEAN_EXPR type keywords"""
+        if output is None:
+            output = []
+        output.extend([s.strip() for s in line.split(',')])
+        # Evaluate each expression to verify syntax.
+        # We don't want any results, just the raised ValueError.
+        for s in output:
+            if s != '*':
+                BooleanExpression.evaluate(s, [])
+        return output
+
+    @staticmethod
+    def _handleRequiresAny(line_number, line, output):
+        """A custom parser to transform REQUIRES-ANY: into REQUIRES:"""
+
+        # Extract the conditions specified in REQUIRES-ANY: as written.
+        conditions = []
+        IntegratedTestKeywordParser._handleList(line_number, line, conditions)
+
+        # Output a `REQUIRES: a || b || c` expression in its place.
+        expression = ' || '.join(conditions)
+        IntegratedTestKeywordParser._handleBooleanExpr(line_number,
+                                                       expression, output)
+        return output
 
 def parseIntegratedTestScript(test, additional_parsers=[],
                               require_script=True):
     """parseIntegratedTestScript - Scan an LLVM/Clang style integrated test
     script and extract the lines to 'RUN' as well as 'XFAIL' and 'REQUIRES'
-    'REQUIRES-ANY' and 'UNSUPPORTED' information.
+    and 'UNSUPPORTED' information.
 
     If additional parsers are specified then the test is also scanned for the
     keywords they specify and all matches are passed to the custom parser.
@@ -855,26 +944,26 @@ def parseIntegratedTestScript(test, additional_parsers=[],
     may be returned. This can be used for test formats where the actual script
     is optional or ignored.
     """
-    # Collect the test lines from the script.
-    sourcepath = test.getSourcePath()
+
+    # Install the built-in keyword parsers.
     script = []
-    requires = []
-    requires_any = []
-    unsupported = []
     builtin_parsers = [
         IntegratedTestKeywordParser('RUN:', ParserKind.COMMAND,
                                     initial_value=script),
-        IntegratedTestKeywordParser('XFAIL:', ParserKind.LIST,
+        IntegratedTestKeywordParser('XFAIL:', ParserKind.BOOLEAN_EXPR,
                                     initial_value=test.xfails),
-        IntegratedTestKeywordParser('REQUIRES:', ParserKind.LIST,
-                                    initial_value=requires),
-        IntegratedTestKeywordParser('REQUIRES-ANY:', ParserKind.LIST,
-                                    initial_value=requires_any),
-        IntegratedTestKeywordParser('UNSUPPORTED:', ParserKind.LIST,
-                                    initial_value=unsupported),
+        IntegratedTestKeywordParser('REQUIRES:', ParserKind.BOOLEAN_EXPR,
+                                    initial_value=test.requires),
+        IntegratedTestKeywordParser('REQUIRES-ANY:', ParserKind.CUSTOM,
+                                    IntegratedTestKeywordParser._handleRequiresAny, 
+                                    initial_value=test.requires), 
+        IntegratedTestKeywordParser('UNSUPPORTED:', ParserKind.BOOLEAN_EXPR,
+                                    initial_value=test.unsupported),
         IntegratedTestKeywordParser('END.', ParserKind.TAG)
     ]
     keyword_parsers = {p.keyword: p for p in builtin_parsers}
+    
+    # Install user-defined additional parsers.
     for parser in additional_parsers:
         if not isinstance(parser, IntegratedTestKeywordParser):
             raise ValueError('additional parser must be an instance of '
@@ -883,7 +972,9 @@ def parseIntegratedTestScript(test, additional_parsers=[],
             raise ValueError("Parser for keyword '%s' already exists"
                              % parser.keyword)
         keyword_parsers[parser.keyword] = parser
-
+        
+    # Collect the test lines from the script.
+    sourcepath = test.getSourcePath()
     for line_number, command_type, ln in \
             parseIntegratedTestScriptCommands(sourcepath,
                                               keyword_parsers.keys()):
@@ -901,46 +992,30 @@ def parseIntegratedTestScript(test, additional_parsers=[],
         return lit.Test.Result(Test.UNRESOLVED,
                                "Test has unterminated run lines (with '\\')")
 
-    # Check that we have the required features:
-    missing_required_features = [f for f in requires
-                                 if f not in test.config.available_features]
+    # Enforce REQUIRES:
+    missing_required_features = test.getMissingRequiredFeatures()
     if missing_required_features:
         msg = ', '.join(missing_required_features)
         return lit.Test.Result(Test.UNSUPPORTED,
-                               "Test requires the following features: %s"
-                               % msg)
-    requires_any_features = [f for f in requires_any
-                             if f in test.config.available_features]
-    if requires_any and not requires_any_features:
-        msg = ' ,'.join(requires_any)
-        return lit.Test.Result(Test.UNSUPPORTED,
-                               "Test requires any of the following features: "
-                               "%s" % msg)
-    unsupported_features = [f for f in unsupported
-                            if f in test.config.available_features]
+                               "Test requires the following unavailable "
+                               "features: %s" % msg)
+
+    # Enforce UNSUPPORTED:
+    unsupported_features = test.getUnsupportedFeatures()
     if unsupported_features:
         msg = ', '.join(unsupported_features)
         return lit.Test.Result(
             Test.UNSUPPORTED,
-            "Test is unsupported with the following features: %s" % msg)
+            "Test does not support the following features "
+            "and/or targets: %s" % msg)
+
+    # Enforce limit_to_features.
+    if not test.isWithinFeatureLimits():
+        msg = ', '.join(test.config.limit_to_features)
+        return lit.Test.Result(Test.UNSUPPORTED,
+                               "Test does not require any of the features "
+                               "specified in limit_to_features: %s" % msg)
 
-    unsupported_targets = [f for f in unsupported
-                           if f in test.suite.config.target_triple]
-    if unsupported_targets:
-        return lit.Test.Result(
-            Test.UNSUPPORTED,
-            "Test is unsupported with the following triple: %s" % (
-             test.suite.config.target_triple,))
-
-    if test.config.limit_to_features:
-        # Check that we have one of the limit_to_features features in requires.
-        limit_to_features_tests = [f for f in test.config.limit_to_features
-                                   if f in requires]
-        if not limit_to_features_tests:
-            msg = ', '.join(test.config.limit_to_features)
-            return lit.Test.Result(
-                Test.UNSUPPORTED,
-                "Test requires one of the limit_to_features features %s" % msg)
     return script
 
 
diff --git a/utils/lit/lit/TestingConfig.py b/utils/lit/lit/TestingConfig.py
index 717b53c67fbeb..c729ec060ace7 100644
--- a/utils/lit/lit/TestingConfig.py
+++ b/utils/lit/lit/TestingConfig.py
@@ -106,7 +106,7 @@ class TestingConfig:
                  environment, substitutions, unsupported,
                  test_exec_root, test_source_root, excludes,
                  available_features, pipefail, limit_to_features = [],
-                 is_early = False):
+                 is_early = False, parallelism_group = ""):
         self.parent = parent
         self.name = str(name)
         self.suffixes = set(suffixes)
@@ -125,6 +125,7 @@ class TestingConfig:
         self.limit_to_features = set(limit_to_features)
         # Whether the suite should be tested early in a given run.
         self.is_early = bool(is_early)
+        self.parallelism_group = parallelism_group
 
     def finish(self, litConfig):
         """finish() - Finish this config object, after loading is complete."""
diff --git a/utils/lit/lit/main.py b/utils/lit/lit/main.py
index ac3066eea7252..689a2d55bcea7 100755
--- a/utils/lit/lit/main.py
+++ b/utils/lit/lit/main.py
@@ -259,6 +259,14 @@ def main_with_tmp(builtinParameters):
                      help=("Only run tests with paths matching the given "
                            "regular expression"),
                      action="store", default=None)
+    selection_group.add_argument("--num-shards", dest="numShards", metavar="M",
+                     help="Split testsuite into M pieces and only run one",
+                     action="store", type=int,
+                     default=os.environ.get("LIT_NUM_SHARDS"))
+    selection_group.add_argument("--run-shard", dest="runShard", metavar="N",
+                     help="Run shard #N of the testsuite",
+                     action="store", type=int,
+                     default=os.environ.get("LIT_RUN_SHARD"))
 
     debug_group = parser.add_argument_group("Debug and Experimental Options")
     debug_group.add_argument("--debug",
@@ -270,12 +278,15 @@ def main_with_tmp(builtinParameters):
     debug_group.add_argument("--show-tests", dest="showTests",
                       help="Show all discovered tests",
                       action="store_true", default=False)
-    debug_group.add_argument("--use-processes", dest="useProcesses",
+    debug_group.add_argument("--use-process-pool", dest="executionStrategy",
+                      help="Run tests in parallel with a process pool",
+                      action="store_const", const="PROCESS_POOL")
+    debug_group.add_argument("--use-processes", dest="executionStrategy",
                       help="Run tests in parallel with processes (not threads)",
-                      action="store_true", default=True)
-    debug_group.add_argument("--use-threads", dest="useProcesses",
+                      action="store_const", const="PROCESSES")
+    debug_group.add_argument("--use-threads", dest="executionStrategy",
                       help="Run tests in parallel with threads (not processes)",
-                      action="store_false", default=True)
+                      action="store_const", const="THREADS")
 
     opts = parser.parse_args()
     args = opts.test_paths
@@ -290,6 +301,9 @@ def main_with_tmp(builtinParameters):
     if opts.numThreads is None:
         opts.numThreads = lit.util.detectCPUs()
 
+    if opts.executionStrategy is None:
+        opts.executionStrategy = 'PROCESS_POOL'
+
     if opts.maxFailures == 0:
         parser.error("Setting --max-failures to 0 does not have any effect.")
 
@@ -327,7 +341,8 @@ def main_with_tmp(builtinParameters):
         params = userParams,
         config_prefix = opts.configPrefix,
         maxIndividualTestTime = maxIndividualTestTime,
-        maxFailures = opts.maxFailures)
+        maxFailures = opts.maxFailures,
+        parallelism_groups = {})
 
     # Perform test discovery.
     run = lit.run.Run(litConfig,
@@ -399,6 +414,29 @@ def main_with_tmp(builtinParameters):
     else:
         run.tests.sort(key = lambda t: (not t.isEarlyTest(), t.getFullName()))
 
+    # Then optionally restrict our attention to a shard of the tests.
+    if (opts.numShards is not None) or (opts.runShard is not None):
+        if (opts.numShards is None) or (opts.runShard is None):
+            parser.error("--num-shards and --run-shard must be used together")
+        if opts.numShards <= 0:
+            parser.error("--num-shards must be positive")
+        if (opts.runShard < 1) or (opts.runShard > opts.numShards):
+            parser.error("--run-shard must be between 1 and --num-shards (inclusive)")
+        num_tests = len(run.tests)
+        # Note: user views tests and shard numbers counting from 1.
+        test_ixs = range(opts.runShard - 1, num_tests, opts.numShards)
+        run.tests = [run.tests[i] for i in test_ixs]
+        # Generate a preview of the first few test indices in the shard
+        # to accompany the arithmetic expression, for clarity.
+        preview_len = 3
+        ix_preview = ", ".join([str(i+1) for i in test_ixs[:preview_len]])
+        if len(test_ixs) > preview_len:
+            ix_preview += ", ..."
+        litConfig.note('Selecting shard %d/%d = size %d/%d = tests #(%d*k)+%d = [%s]' %
+                       (opts.runShard, opts.numShards,
+                        len(run.tests), num_tests,
+                        opts.numShards, opts.runShard, ix_preview))
+
     # Finally limit the number of tests, if desired.
     if opts.maxTests is not None:
         run.tests = run.tests[:opts.maxTests]
@@ -449,7 +487,7 @@ def main_with_tmp(builtinParameters):
     display = TestingProgressDisplay(opts, len(run.tests), progressBar)
     try:
         run.execute_tests(display, opts.numThreads, opts.maxTime,
-                          opts.useProcesses)
+                          opts.executionStrategy)
     except KeyboardInterrupt:
         sys.exit(2)
     display.finish()
diff --git a/utils/lit/lit/run.py b/utils/lit/lit/run.py
index f7e84d316a7cd..14d8ec98490e8 100644
--- a/utils/lit/lit/run.py
+++ b/utils/lit/lit/run.py
@@ -1,4 +1,5 @@
 import os
+import sys
 import threading
 import time
 import traceback
@@ -84,11 +85,13 @@ class Tester(object):
     def run_test(self, test_index):
         test = self.run_instance.tests[test_index]
         try:
-            self.run_instance.execute_test(test)
+            execute_test(test, self.run_instance.lit_config,
+                         self.run_instance.parallelism_semaphores)
         except KeyboardInterrupt:
             # This is a sad hack. Unfortunately subprocess goes
             # bonkers with ctrl-c and we start forking merrily.
             print('\nCtrl-C detected, goodbye.')
+            sys.stdout.flush()
             os.kill(0,9)
         self.consumer.update(test_index, test)
 
@@ -167,6 +170,44 @@ class _Display(object):
 def handleFailures(provider, consumer, maxFailures):
     consumer.display = _Display(consumer.display, provider, maxFailures)
 
+def execute_test(test, lit_config, parallelism_semaphores):
+    """Execute one test"""
+    pg = test.config.parallelism_group
+    if callable(pg):
+        pg = pg(test)
+
+    result = None
+    semaphore = None
+    try:
+        if pg:
+            semaphore = parallelism_semaphores[pg]
+        if semaphore:
+            semaphore.acquire()
+        start_time = time.time()
+        result = test.config.test_format.execute(test, lit_config)
+        # Support deprecated result from execute() which returned the result
+        # code and additional output as a tuple.
+        if isinstance(result, tuple):
+            code, output = result
+            result = lit.Test.Result(code, output)
+        elif not isinstance(result, lit.Test.Result):
+            raise ValueError("unexpected result from test execution")
+        result.elapsed = time.time() - start_time
+    except KeyboardInterrupt:
+        raise
+    except:
+        if lit_config.debug:
+            raise
+        output = 'Exception during script execution:\n'
+        output += traceback.format_exc()
+        output += '\n'
+        result = lit.Test.Result(lit.Test.UNRESOLVED, output)
+    finally:
+        if semaphore:
+            semaphore.release()
+
+    test.setResult(result)
+
 class Run(object):
     """
     This class represents a concrete, configured testing run.
@@ -177,33 +218,10 @@ class Run(object):
         self.tests = tests
 
     def execute_test(self, test):
-        result = None
-        start_time = time.time()
-        try:
-            result = test.config.test_format.execute(test, self.lit_config)
-
-            # Support deprecated result from execute() which returned the result
-            # code and additional output as a tuple.
-            if isinstance(result, tuple):
-                code, output = result
-                result = lit.Test.Result(code, output)
-            elif not isinstance(result, lit.Test.Result):
-                raise ValueError("unexpected result from test execution")
-        except KeyboardInterrupt:
-            raise
-        except:
-            if self.lit_config.debug:
-                raise
-            output = 'Exception during script execution:\n'
-            output += traceback.format_exc()
-            output += '\n'
-            result = lit.Test.Result(lit.Test.UNRESOLVED, output)
-        result.elapsed = time.time() - start_time
-
-        test.setResult(result)
+        return execute_test(test, self.lit_config, self.parallelism_semaphores)
 
     def execute_tests(self, display, jobs, max_time=None,
-                      use_processes=False):
+                      execution_strategy=None):
         """
         execute_tests(display, jobs, [max_time])
 
@@ -225,12 +243,21 @@ class Run(object):
         be given an UNRESOLVED result.
         """
 
+        if execution_strategy == 'PROCESS_POOL':
+            self.execute_tests_with_mp_pool(display, jobs, max_time)
+            return
+        # FIXME: Standardize on the PROCESS_POOL execution strategy and remove
+        # the other two strategies.
+
+        use_processes = execution_strategy == 'PROCESSES'
+
         # Choose the appropriate parallel execution implementation.
         consumer = None
         if jobs != 1 and use_processes and multiprocessing:
             try:
                 task_impl = multiprocessing.Process
                 queue_impl = multiprocessing.Queue
+                sem_impl = multiprocessing.Semaphore
                 canceled_flag =  multiprocessing.Value('i', 0)
                 consumer = MultiprocessResultsConsumer(self, display, jobs)
             except:
@@ -242,15 +269,19 @@ class Run(object):
         if not consumer:
             task_impl = threading.Thread
             queue_impl = queue.Queue
+            sem_impl = threading.Semaphore
             canceled_flag = LockedValue(0)
             consumer = ThreadResultsConsumer(display)
 
+        self.parallelism_semaphores = {k: sem_impl(v)
+            for k, v in self.lit_config.parallelism_groups.items()}
+
         # Create the test provider.
         provider = TestProvider(queue_impl, canceled_flag)
         handleFailures(provider, consumer, self.lit_config.maxFailures)
 
-        # Queue the tests outside the main thread because we can't guarantee
-        # that we can put() all the tests without blocking:
+        # Putting tasks into the threading or multiprocessing Queue may block,
+        # so do it in a separate thread.
         # https://docs.python.org/2/library/multiprocessing.html
         # e.g: On Mac OS X, we will hang if we put 2^15 elements in the queue
         # without taking any out.
@@ -303,3 +334,140 @@ class Run(object):
         # Wait for all the tasks to complete.
         for t in tasks:
             t.join()
+
+    def execute_tests_with_mp_pool(self, display, jobs, max_time=None):
+        # Don't do anything if we aren't going to run any tests.
+        if not self.tests or jobs == 0:
+            return
+
+        # Set up semaphores to limit parallelism of certain classes of tests.
+        # For example, some ASan tests require lots of virtual memory and run
+        # faster with less parallelism on OS X.
+        self.parallelism_semaphores = \
+                {k: multiprocessing.Semaphore(v) for k, v in
+                 self.lit_config.parallelism_groups.items()}
+
+        # Install a console-control signal handler on Windows.
+        if win32api is not None:
+            def console_ctrl_handler(type):
+                print('\nCtrl-C detected, terminating.')
+                pool.terminate()
+                pool.join()
+                os.kill(0,9)
+                return True
+            win32api.SetConsoleCtrlHandler(console_ctrl_handler, True)
+
+        # Save the display object on the runner so that we can update it from
+        # our task completion callback.
+        self.display = display
+
+        # We need to issue many wait calls, so compute the final deadline and
+        # subtract time.time() from that as we go along.
+        deadline = None
+        if max_time:
+            deadline = time.time() + max_time
+
+        # Start a process pool. Copy over the data shared between all test runs.
+        pool = multiprocessing.Pool(jobs, worker_initializer,
+                                    (self.lit_config,
+                                     self.parallelism_semaphores))
+
+        try:
+            self.failure_count = 0
+            self.hit_max_failures = False
+            async_results = [pool.apply_async(worker_run_one_test,
+                                              args=(test_index, test),
+                                              callback=self.consume_test_result)
+                             for test_index, test in enumerate(self.tests)]
+
+            # Wait for all results to come in. The callback that runs in the
+            # parent process will update the display.
+            for a in async_results:
+                if deadline:
+                    a.wait(deadline - time.time())
+                else:
+                    # Python condition variables cannot be interrupted unless
+                    # they have a timeout. This can make lit unresponsive to
+                    # KeyboardInterrupt, so do a busy wait with a timeout.
+                    while not a.ready():
+                        a.wait(1)
+                if not a.successful():
+                    a.get() # Exceptions raised here come from the worker.
+                if self.hit_max_failures:
+                    break
+        finally:
+            # Stop the workers and wait for any straggling results to come in
+            # if we exited without waiting on every async result.
+            pool.terminate()
+            pool.join()
+
+        # Mark any tests that weren't run as UNRESOLVED.
+        for test in self.tests:
+            if test.result is None:
+                test.setResult(lit.Test.Result(lit.Test.UNRESOLVED, '', 0.0))
+
+    def consume_test_result(self, pool_result):
+        """Test completion callback for worker_run_one_test
+
+        Updates the test result status in the parent process. Each task in the
+        pool returns the test index and the result, and we use the index to look
+        up the original test object. Also updates the progress bar as tasks
+        complete.
+        """
+        # Don't add any more test results after we've hit the maximum failure
+        # count.  Otherwise we're racing with the main thread, which is going
+        # to terminate the process pool soon.
+        if self.hit_max_failures:
+            return
+
+        (test_index, test_with_result) = pool_result
+        # Update the parent process copy of the test. This includes the result,
+        # XFAILS, REQUIRES, and UNSUPPORTED statuses.
+        assert self.tests[test_index].file_path == test_with_result.file_path, \
+                "parent and child disagree on test path"
+        self.tests[test_index] = test_with_result
+        self.display.update(test_with_result)
+
+        # If we've finished all the tests or too many tests have failed, notify
+        # the main thread that we've stopped testing.
+        self.failure_count += (test_with_result.result.code == lit.Test.FAIL)
+        if self.lit_config.maxFailures and \
+                self.failure_count == self.lit_config.maxFailures:
+            self.hit_max_failures = True
+
+child_lit_config = None
+child_parallelism_semaphores = None
+
+def worker_initializer(lit_config, parallelism_semaphores):
+    """Copy expensive repeated data into worker processes"""
+    global child_lit_config
+    child_lit_config = lit_config
+    global child_parallelism_semaphores
+    child_parallelism_semaphores = parallelism_semaphores
+
+def worker_run_one_test(test_index, test):
+    """Run one test in a multiprocessing.Pool
+
+    Side effects in this function and functions it calls are not visible in the
+    main lit process.
+
+    Arguments and results of this function are pickled, so they should be cheap
+    to copy. For efficiency, we copy all data needed to execute all tests into
+    each worker and store it in the child_* global variables. This reduces the
+    cost of each task.
+
+    Returns an index and a Result, which the parent process uses to update
+    the display.
+    """
+    try:
+        execute_test(test, child_lit_config, child_parallelism_semaphores)
+        return (test_index, test)
+    except KeyboardInterrupt as e:
+        # This is a sad hack. Unfortunately subprocess goes
+        # bonkers with ctrl-c and we start forking merrily.
+        print('\nCtrl-C detected, goodbye.')
+        traceback.print_exc()
+        sys.stdout.flush()
+        os.kill(0,9)
+    except:
+        traceback.print_exc()
diff --git a/utils/lit/lit/util.py b/utils/lit/lit/util.py
index be37998c6f16c..104e9dac464d7 100644
--- a/utils/lit/lit/util.py
+++ b/utils/lit/lit/util.py
@@ -10,6 +10,8 @@ import threading
 
 def to_bytes(str):
     # Encode to UTF-8 to get binary data.
+    if isinstance(str, bytes):
+        return str
     return str.encode('utf-8')
 
 def to_string(bytes):
@@ -200,6 +202,8 @@ def executeCommand(command, cwd=None, env=None, input=None, timeout=0):
         If the timeout is hit an ``ExecuteCommandTimeoutException``
         is raised.
     """
+    if input is not None:
+        input = to_bytes(input)
     p = subprocess.Popen(command, cwd=cwd,
                          stdin=subprocess.PIPE,
                          stdout=subprocess.PIPE,
diff --git a/utils/lit/tests/Inputs/shtest-format/requires-missing.txt b/utils/lit/tests/Inputs/shtest-format/requires-missing.txt
index 9e6648d8b8f08..d643e57edcad2 100644
--- a/utils/lit/tests/Inputs/shtest-format/requires-missing.txt
+++ b/utils/lit/tests/Inputs/shtest-format/requires-missing.txt
@@ -1,2 +1,5 @@
-RUN: true
-REQUIRES: a-missing-feature
+# REQUIRES with a false clause. Test should not run.
+REQUIRES: true
+REQUIRES: a-missing-feature, true
+REQUIRES: true
+RUN: false
diff --git a/utils/lit/tests/Inputs/shtest-format/requires-present.txt b/utils/lit/tests/Inputs/shtest-format/requires-present.txt
index 064f7074a76ea..9fcbdca69be35 100644
--- a/utils/lit/tests/Inputs/shtest-format/requires-present.txt
+++ b/utils/lit/tests/Inputs/shtest-format/requires-present.txt
@@ -1,2 +1,4 @@
+# REQUIRES with only true clauses. Test should run.
+REQUIRES: a-present-feature, true, !not-true
+REQUIRES: true
 RUN: true
-REQUIRES: a-present-feature
diff --git a/utils/lit/tests/Inputs/shtest-format/requires-star.txt b/utils/lit/tests/Inputs/shtest-format/requires-star.txt
new file mode 100644
index 0000000000000..5566d8b15b074
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-format/requires-star.txt
@@ -0,0 +1,3 @@
+# '*' only works in XFAIL
+REQUIRES: *
+RUN: false
diff --git a/utils/lit/tests/Inputs/shtest-format/requires-triple.txt b/utils/lit/tests/Inputs/shtest-format/requires-triple.txt
new file mode 100644
index 0000000000000..6470bf4041457
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-format/requires-triple.txt
@@ -0,0 +1,3 @@
+# REQUIRES line that uses target triple, which doesn't work. Test should not run
+REQUIRES: x86_64
+RUN: false
diff --git a/utils/lit/tests/Inputs/shtest-format/unsupported-expr-false.txt b/utils/lit/tests/Inputs/shtest-format/unsupported-expr-false.txt
new file mode 100644
index 0000000000000..00c6160a367c1
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-format/unsupported-expr-false.txt
@@ -0,0 +1,9 @@
+# UNSUPPORTED with only false clauses. Test should run.
+UNSUPPORTED: false
+UNSUPPORTED: false, not-true
+UNSUPPORTED: false
+UNSUPPORTED: still-not-true
+UNSUPPORTED: false
+UNSUPPORTED: false
+UNSUPPORTED: false
+RUN: true
diff --git a/utils/lit/tests/Inputs/shtest-format/unsupported-expr-true.txt b/utils/lit/tests/Inputs/shtest-format/unsupported-expr-true.txt
new file mode 100644
index 0000000000000..f48ba7b2c2d22
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-format/unsupported-expr-true.txt
@@ -0,0 +1,4 @@
+# UNSUPPORTED with a true clause. Test should not run.
+UNSUPPORTED: false
+UNSUPPORTED: false, false, false, _64-unk && a-present-feature, false
+RUN: false
diff --git a/utils/lit/tests/Inputs/shtest-format/unsupported-star.txt b/utils/lit/tests/Inputs/shtest-format/unsupported-star.txt
new file mode 100644
index 0000000000000..16630207dacb9
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-format/unsupported-star.txt
@@ -0,0 +1,3 @@
+# '*' only works in XFAIL
+UNSUPPORTED: *
+RUN: false
diff --git a/utils/lit/tests/Inputs/shtest-format/xfail-expr-false.txt b/utils/lit/tests/Inputs/shtest-format/xfail-expr-false.txt
new file mode 100644
index 0000000000000..83b0de1621d08
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-format/xfail-expr-false.txt
@@ -0,0 +1,3 @@
+# XFAIL with only false clauses. Test should run.
+XFAIL: false, a-missing-feature || ! a-present-feature || ! x86_64, false
+RUN: true
diff --git a/utils/lit/tests/Inputs/shtest-format/xfail-expr-true.txt b/utils/lit/tests/Inputs/shtest-format/xfail-expr-true.txt
new file mode 100644
index 0000000000000..3c197484897e8
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-format/xfail-expr-true.txt
@@ -0,0 +1,4 @@
+# XFAIL with a true clause. Test should not run.
+XFAIL: false
+XFAIL: false, a-present-feature && ! a-missing-feature && x86_64
+RUN: false
diff --git a/utils/lit/tests/Inputs/test-data/dummy_format.py b/utils/lit/tests/Inputs/test-data/dummy_format.py
new file mode 100644
index 0000000000000..93e48eeb83960
--- /dev/null
+++ b/utils/lit/tests/Inputs/test-data/dummy_format.py
@@ -0,0 +1,38 @@
+import os
+try:
+    import ConfigParser
+except ImportError:
+    import configparser as ConfigParser
+
+import lit.formats
+import lit.Test
+
+class DummyFormat(lit.formats.FileBasedTest):
+    def execute(self, test, lit_config):
+        # In this dummy format, expect that each test file is actually just a
+        # .ini format dump of the results to report.
+
+        source_path = test.getSourcePath()
+
+        cfg = ConfigParser.ConfigParser()
+        cfg.read(source_path)
+
+        # Create the basic test result.
+        result_code = cfg.get('global', 'result_code')
+        result_output = cfg.get('global', 'result_output')
+        result = lit.Test.Result(getattr(lit.Test, result_code),
+                                 result_output)
+
+        # Load additional metrics.
+        for key,value_str in cfg.items('results'):
+            value = eval(value_str)
+            if isinstance(value, int):
+                metric = lit.Test.IntMetricValue(value)
+            elif isinstance(value, float):
+                metric = lit.Test.RealMetricValue(value)
+            else:
+                raise RuntimeError("unsupported result type")
+            result.addMetric(key, metric)
+
+        return result
+
diff --git a/utils/lit/tests/Inputs/test-data/lit.cfg b/utils/lit/tests/Inputs/test-data/lit.cfg
index f5aba7b217748..0191cc2188843 100644
--- a/utils/lit/tests/Inputs/test-data/lit.cfg
+++ b/utils/lit/tests/Inputs/test-data/lit.cfg
@@ -1,44 +1,10 @@
-import os
-try:
-    import ConfigParser
-except ImportError:
-    import configparser as ConfigParser
-
-import lit.formats
-import lit.Test
-
-class DummyFormat(lit.formats.FileBasedTest):
-    def execute(self, test, lit_config):
-        # In this dummy format, expect that each test file is actually just a
-        # .ini format dump of the results to report.
-
-        source_path = test.getSourcePath()
-
-        cfg = ConfigParser.ConfigParser()
-        cfg.read(source_path)
-
-        # Create the basic test result.
-        result_code = cfg.get('global', 'result_code')
-        result_output = cfg.get('global', 'result_output')
-        result = lit.Test.Result(getattr(lit.Test, result_code),
-                                 result_output)
-
-        # Load additional metrics.
-        for key,value_str in cfg.items('results'):
-            value = eval(value_str)
-            if isinstance(value, int):
-                metric = lit.Test.IntMetricValue(value)
-            elif isinstance(value, float):
-                metric = lit.Test.RealMetricValue(value)
-            else:
-                raise RuntimeError("unsupported result type")
-            result.addMetric(key, metric)
-
-        return result
+import site
+site.addsitedir(os.path.dirname(__file__))
+import dummy_format
 
 config.name = 'test-data'
 config.suffixes = ['.ini']
-config.test_format = DummyFormat()
+config.test_format = dummy_format.DummyFormat()
 config.test_source_root = None
 config.test_exec_root = None
 config.target_triple = None
diff --git a/utils/lit/tests/boolean-parsing.py b/utils/lit/tests/boolean-parsing.py
new file mode 100644
index 0000000000000..372a94d233234
--- /dev/null
+++ b/utils/lit/tests/boolean-parsing.py
@@ -0,0 +1,4 @@
+# Test the boolean expression parser
+# used for REQUIRES and UNSUPPORTED and XFAIL
+
+# RUN: %{python} -m lit.BooleanExpression
diff --git a/utils/lit/tests/selecting.py b/utils/lit/tests/selecting.py
new file mode 100644
index 0000000000000..72d6fbabdc932
--- /dev/null
+++ b/utils/lit/tests/selecting.py
@@ -0,0 +1,90 @@
+# RUN: %{lit} %{inputs}/discovery | FileCheck --check-prefix=CHECK-BASIC %s
+# CHECK-BASIC: Testing: 5 tests
+
+
+# Check that regex-filtering works
+#
+# RUN: %{lit} --filter 'o[a-z]e' %{inputs}/discovery | FileCheck --check-prefix=CHECK-FILTER %s
+# CHECK-FILTER: Testing: 2 of 5 tests
+
+
+# Check that maximum counts work
+#
+# RUN: %{lit} --max-tests 3 %{inputs}/discovery | FileCheck --check-prefix=CHECK-MAX %s
+# CHECK-MAX: Testing: 3 of 5 tests
+
+
+# Check that sharding partitions the testsuite in a way that distributes the
+# rounding error nicely (i.e. 5/3 => 2 2 1, not 1 1 3 or whatever)
+#
+# RUN: %{lit} --num-shards 3 --run-shard 1 %{inputs}/discovery >%t.out 2>%t.err
+# RUN: FileCheck --check-prefix=CHECK-SHARD0-ERR < %t.err %s
+# RUN: FileCheck --check-prefix=CHECK-SHARD0-OUT < %t.out %s
+# CHECK-SHARD0-ERR: note: Selecting shard 1/3 = size 2/5 = tests #(3*k)+1 = [1, 4]
+# CHECK-SHARD0-OUT: Testing: 2 of 5 tests
+#
+# RUN: %{lit} --num-shards 3 --run-shard 2 %{inputs}/discovery >%t.out 2>%t.err
+# RUN: FileCheck --check-prefix=CHECK-SHARD1-ERR < %t.err %s
+# RUN: FileCheck --check-prefix=CHECK-SHARD1-OUT < %t.out %s
+# CHECK-SHARD1-ERR: note: Selecting shard 2/3 = size 2/5 = tests #(3*k)+2 = [2, 5]
+# CHECK-SHARD1-OUT: Testing: 2 of 5 tests
+#
+# RUN: %{lit} --num-shards 3 --run-shard 3 %{inputs}/discovery >%t.out 2>%t.err
+# RUN: FileCheck --check-prefix=CHECK-SHARD2-ERR < %t.err %s
+# RUN: FileCheck --check-prefix=CHECK-SHARD2-OUT < %t.out %s
+# CHECK-SHARD2-ERR: note: Selecting shard 3/3 = size 1/5 = tests #(3*k)+3 = [3]
+# CHECK-SHARD2-OUT: Testing: 1 of 5 tests
+
+
+# Check that sharding via env vars works.
+#
+# RUN: env LIT_NUM_SHARDS=3 LIT_RUN_SHARD=1 %{lit} %{inputs}/discovery >%t.out 2>%t.err
+# RUN: FileCheck --check-prefix=CHECK-SHARD0-ENV-ERR < %t.err %s
+# RUN: FileCheck --check-prefix=CHECK-SHARD0-ENV-OUT < %t.out %s
+# CHECK-SHARD0-ENV-ERR: note: Selecting shard 1/3 = size 2/5 = tests #(3*k)+1 = [1, 4]
+# CHECK-SHARD0-ENV-OUT: Testing: 2 of 5 tests
+#
+# RUN: env LIT_NUM_SHARDS=3 LIT_RUN_SHARD=2 %{lit} %{inputs}/discovery >%t.out 2>%t.err
+# RUN: FileCheck --check-prefix=CHECK-SHARD1-ENV-ERR < %t.err %s
+# RUN: FileCheck --check-prefix=CHECK-SHARD1-ENV-OUT < %t.out %s
+# CHECK-SHARD1-ENV-ERR: note: Selecting shard 2/3 = size 2/5 = tests #(3*k)+2 = [2, 5]
+# CHECK-SHARD1-ENV-OUT: Testing: 2 of 5 tests
+#
+# RUN: env LIT_NUM_SHARDS=3 LIT_RUN_SHARD=3 %{lit} %{inputs}/discovery >%t.out 2>%t.err
+# RUN: FileCheck --check-prefix=CHECK-SHARD2-ENV-ERR < %t.err %s
+# RUN: FileCheck --check-prefix=CHECK-SHARD2-ENV-OUT < %t.out %s
+# CHECK-SHARD2-ENV-ERR: note: Selecting shard 3/3 = size 1/5 = tests #(3*k)+3 = [3]
+# CHECK-SHARD2-ENV-OUT: Testing: 1 of 5 tests
+
+
+# Check that providing more shards than tests results in 1 test per shard
+# until we run out, then 0.
+#
+# RUN: %{lit} --num-shards 100 --run-shard 2 %{inputs}/discovery >%t.out 2>%t.err
+# RUN: FileCheck --check-prefix=CHECK-SHARD-BIG-ERR1 < %t.err %s
+# RUN: FileCheck --check-prefix=CHECK-SHARD-BIG-OUT1 < %t.out %s
+# CHECK-SHARD-BIG-ERR1: note: Selecting shard 2/100 = size 1/5 = tests #(100*k)+2 = [2]
+# CHECK-SHARD-BIG-OUT1: Testing: 1 of 5 tests
+#
+# RUN: %{lit} --num-shards 100 --run-shard 6 %{inputs}/discovery >%t.out 2>%t.err
+# RUN: FileCheck --check-prefix=CHECK-SHARD-BIG-ERR2 < %t.err %s
+# RUN: FileCheck --check-prefix=CHECK-SHARD-BIG-OUT2 < %t.out %s
+# CHECK-SHARD-BIG-ERR2: note: Selecting shard 6/100 = size 0/5 = tests #(100*k)+6 = []
+# CHECK-SHARD-BIG-OUT2: Testing: 0 of 5 tests
+#
+# RUN: %{lit} --num-shards 100 --run-shard 50 %{inputs}/discovery >%t.out 2>%t.err
+# RUN: FileCheck --check-prefix=CHECK-SHARD-BIG-ERR3 < %t.err %s
+# RUN: FileCheck --check-prefix=CHECK-SHARD-BIG-OUT3 < %t.out %s
+# CHECK-SHARD-BIG-ERR3: note: Selecting shard 50/100 = size 0/5 = tests #(100*k)+50 = []
+# CHECK-SHARD-BIG-OUT3: Testing: 0 of 5 tests
+
+
+# Check that range constraints are enforced
+#
+# RUN: not %{lit} --num-shards 0 --run-shard 2 %{inputs}/discovery >%t.out 2>%t.err
+# RUN: FileCheck --check-prefix=CHECK-SHARD-ERR < %t.err %s
+# CHECK-SHARD-ERR: error: --num-shards must be positive
+#
+# RUN: not %{lit} --num-shards 3 --run-shard 4 %{inputs}/discovery >%t.out 2>%t.err
+# RUN: FileCheck --check-prefix=CHECK-SHARD-ERR2 < %t.err %s
+# CHECK-SHARD-ERR2: error: --run-shard must be between 1 and --num-shards (inclusive)
diff --git a/utils/lit/tests/shtest-format.py b/utils/lit/tests/shtest-format.py
index 20884f8c4854d..37e3e1c12629d 100644
--- a/utils/lit/tests/shtest-format.py
+++ b/utils/lit/tests/shtest-format.py
@@ -50,7 +50,14 @@
 # CHECK: PASS: shtest-format :: requires-any-present.txt
 # CHECK: UNSUPPORTED: shtest-format :: requires-missing.txt
 # CHECK: PASS: shtest-format :: requires-present.txt
+# CHECK: UNRESOLVED: shtest-format :: requires-star.txt
+# CHECK: UNSUPPORTED: shtest-format :: requires-triple.txt
+# CHECK: PASS: shtest-format :: unsupported-expr-false.txt
+# CHECK: UNSUPPORTED: shtest-format :: unsupported-expr-true.txt
+# CHECK: UNRESOLVED: shtest-format :: unsupported-star.txt
 # CHECK: UNSUPPORTED: shtest-format :: unsupported_dir/some-test.txt
+# CHECK: PASS: shtest-format :: xfail-expr-false.txt
+# CHECK: XFAIL: shtest-format :: xfail-expr-true.txt
 # CHECK: XFAIL: shtest-format :: xfail-feature.txt
 # CHECK: XFAIL: shtest-format :: xfail-target.txt
 # CHECK: XFAIL: shtest-format :: xfail.txt
@@ -70,9 +77,9 @@
 # CHECK: shtest-format :: external_shell/fail_with_bad_encoding.txt
 # CHECK: shtest-format :: fail.txt
 
-# CHECK: Expected Passes    : 5
-# CHECK: Expected Failures  : 3
-# CHECK: Unsupported Tests  : 3
-# CHECK: Unresolved Tests   : 1
+# CHECK: Expected Passes    : 7
+# CHECK: Expected Failures  : 4
+# CHECK: Unsupported Tests  : 5
+# CHECK: Unresolved Tests   : 3
 # CHECK: Unexpected Passes  : 1
 # CHECK: Unexpected Failures: 3
diff --git a/utils/lit/tests/unit/TestRunner.py b/utils/lit/tests/unit/TestRunner.py
index ff11834fed7ef..79cc10f7e14d6 100644
--- a/utils/lit/tests/unit/TestRunner.py
+++ b/utils/lit/tests/unit/TestRunner.py
@@ -89,7 +89,7 @@ class TestIntegratedTestKeywordParser(unittest.TestCase):
         parsers = self.make_parsers()
         self.parse_test(parsers)
         list_parser = self.get_parser(parsers, 'MY_LIST:')
-        self.assertItemsEqual(list_parser.getValue(),
+        self.assertEqual(list_parser.getValue(),
                               ['one', 'two', 'three', 'four'])
 
     def test_commands(self):
@@ -106,8 +106,65 @@ class TestIntegratedTestKeywordParser(unittest.TestCase):
         self.parse_test(parsers)
         custom_parser = self.get_parser(parsers, 'MY_CUSTOM:')
         value = custom_parser.getValue()
-        self.assertItemsEqual(value, ['a', 'b', 'c'])
+        self.assertEqual(value, ['a', 'b', 'c'])
 
+    def test_bad_keywords(self):
+        def custom_parse(line_number, line, output):
+            return output
+        
+        try:
+            IntegratedTestKeywordParser("TAG_NO_SUFFIX", ParserKind.TAG),
+            self.fail("TAG_NO_SUFFIX failed to raise an exception")
+        except ValueError as e:
+            pass
+        except BaseException as e:
+            self.fail("TAG_NO_SUFFIX raised the wrong exception: %r" % e)
+
+        try:
+            IntegratedTestKeywordParser("TAG_WITH_COLON:", ParserKind.TAG),
+            self.fail("TAG_WITH_COLON: failed to raise an exception")
+        except ValueError as e:
+            pass
+        except BaseException as e:
+            self.fail("TAG_WITH_COLON: raised the wrong exception: %r" % e)
+
+        try:
+            IntegratedTestKeywordParser("LIST_WITH_DOT.", ParserKind.LIST),
+            self.fail("LIST_WITH_DOT. failed to raise an exception")
+        except ValueError as e:
+            pass
+        except BaseException as e:
+            self.fail("LIST_WITH_DOT. raised the wrong exception: %r" % e)
+
+        try:
+            IntegratedTestKeywordParser("CUSTOM_NO_SUFFIX",
+                                        ParserKind.CUSTOM, custom_parse),
+            self.fail("CUSTOM_NO_SUFFIX failed to raise an exception")
+        except ValueError as e:
+            pass
+        except BaseException as e:
+            self.fail("CUSTOM_NO_SUFFIX raised the wrong exception: %r" % e)
+
+        # Both '.' and ':' are allowed for CUSTOM keywords.
+        try:
+            IntegratedTestKeywordParser("CUSTOM_WITH_DOT.",
+                                        ParserKind.CUSTOM, custom_parse),
+        except BaseException as e:
+            self.fail("CUSTOM_WITH_DOT. raised an exception: %r" % e)
+        try:
+            IntegratedTestKeywordParser("CUSTOM_WITH_COLON:",
+                                        ParserKind.CUSTOM, custom_parse),
+        except BaseException as e:
+            self.fail("CUSTOM_WITH_COLON: raised an exception: %r" % e)
+
+        try:
+            IntegratedTestKeywordParser("CUSTOM_NO_PARSER:",
+                                        ParserKind.CUSTOM),
+            self.fail("CUSTOM_NO_PARSER: failed to raise an exception")
+        except ValueError as e:
+            pass
+        except BaseException as e:
+            self.fail("CUSTOM_NO_PARSER: raised the wrong exception: %r" % e)
 
 if __name__ == '__main__':
     TestIntegratedTestKeywordParser.load_keyword_parser_lit_tests()
diff --git a/utils/llvm-lit/llvm-lit.in b/utils/llvm-lit/llvm-lit.in
index 57924fab8d36a..08a2a670b21c8 100644
--- a/utils/llvm-lit/llvm-lit.in
+++ b/utils/llvm-lit/llvm-lit.in
@@ -39,6 +39,16 @@ if os.path.exists(compilerrt_obj_root):
     builtin_parameters['compilerrt_site_basedir'] = \
             os.path.join(compilerrt_obj_root, 'test')
 
+libcxx_obj_root = os.path.join(llvm_obj_root, 'projects', 'libcxx')
+if os.path.exists(libcxx_obj_root):
+    builtin_parameters['libcxx_site_config'] = \
+        os.path.join(libcxx_obj_root, 'test', 'lit.site.cfg')
+
+libcxxabi_obj_root = os.path.join(llvm_obj_root, 'projects', 'libcxxabi')
+if os.path.exists(libcxxabi_obj_root):
+    builtin_parameters['libcxxabi_site_config'] = \
+        os.path.join(libcxxabi_obj_root, 'test', 'lit.site.cfg')
+
 if __name__=='__main__':
     from lit.main import main
     main(builtin_parameters)
diff --git a/utils/opt-viewer/opt-diff.py b/utils/opt-viewer/opt-diff.py
new file mode 100755
index 0000000000000..8c377860653e0
--- /dev/null
+++ b/utils/opt-viewer/opt-diff.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python2.7
+
+from __future__ import print_function
+
+desc = '''Generate the difference of two YAML files into a new YAML file (works on
+pair of directories too).  A new attribute 'Added' is set to True or False
+depending whether the entry is added or removed from the first input to the
+next.
+
+The tools requires PyYAML.'''
+
+import yaml
+# Try to use the C parser.
+try:
+    from yaml import CLoader as Loader
+except ImportError:
+    from yaml import Loader
+
+import optrecord
+import argparse
+from collections import defaultdict
+from multiprocessing import cpu_count, Pool
+import os, os.path
+import fnmatch
+
+def find_files(dir_or_file):
+    if os.path.isfile(dir_or_file):
+        return [dir_or_file]
+
+    all = []
+    for dir, subdirs, files in os.walk(dir_or_file):
+        for file in files:
+            if fnmatch.fnmatch(file, "*.opt.yaml"):
+                all.append( os.path.join(dir, file))
+    return all
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description=desc)
+    parser.add_argument('yaml_dir_or_file_1')
+    parser.add_argument('yaml_dir_or_file_2')
+    parser.add_argument(
+        '--jobs',
+        '-j',
+        default=cpu_count(),
+        type=int,
+        help='Max job count (defaults to current CPU count)')
+    parser.add_argument('--output', '-o', default='diff.opt.yaml')
+    args = parser.parse_args()
+
+    if args.jobs == 1:
+        pmap = map
+    else:
+        pool = Pool(processes=args.jobs)
+        pmap = pool.map
+
+    files1 = find_files(args.yaml_dir_or_file_1)
+    files2 = find_files(args.yaml_dir_or_file_2)
+
+    all_remarks1, _, _ = optrecord.gather_results(pmap, files1)
+    all_remarks2, _, _ = optrecord.gather_results(pmap, files2)
+
+    added = set(all_remarks2.values()) - set(all_remarks1.values())
+    removed = set(all_remarks1.values()) - set(all_remarks2.values())
+
+    for r in added:
+        r.Added = True
+    for r in removed:
+        r.Added = False
+    stream = file(args.output, 'w')
+    yaml.dump_all(added | removed, stream)
diff --git a/utils/opt-viewer/opt-stats.py b/utils/opt-viewer/opt-stats.py
new file mode 100755
index 0000000000000..b22a052a737a1
--- /dev/null
+++ b/utils/opt-viewer/opt-stats.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python2.7
+
+from __future__ import print_function
+
+desc = '''Generate statistics about optimization records from the YAML files
+generated with -fsave-optimization-record and -fdiagnostics-show-hotness.
+
+The tools requires PyYAML and Pygments Python packages.'''
+
+import optrecord
+import argparse
+import operator
+from collections import defaultdict
+from multiprocessing import cpu_count, Pool
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description=desc)
+    parser.add_argument('yaml_files', nargs='+')
+    parser.add_argument(
+        '--jobs',
+        '-j',
+        default=cpu_count(),
+        type=int,
+        help='Max job count (defaults to current CPU count)')
+    args = parser.parse_args()
+
+    if len(args.yaml_files) == 0:
+        parser.print_help()
+        sys.exit(1)
+
+    if args.jobs == 1:
+        pmap = map
+    else:
+        pool = Pool(processes=args.jobs)
+        pmap = pool.map
+
+    all_remarks, file_remarks, _ = optrecord.gather_results(pmap, args.yaml_files)
+
+    bypass = defaultdict(int)
+    byname = defaultdict(int)
+    for r in all_remarks.itervalues():
+        bypass[r.Pass] += 1
+        byname[r.Pass + "/" + r.Name] += 1
+
+    total = len(all_remarks)
+    print("{:24s} {:10d}\n".format("Total number of remarks", total))
+
+    print("Top 10 remarks by pass:")
+    for (passname, count) in sorted(bypass.items(), key=operator.itemgetter(1),
+                                    reverse=True)[:10]:
+        print("  {:30s} {:2.0f}%". format(passname, count * 100. / total))
+
+    print("\nTop 10 remarks:")
+    for (name, count) in sorted(byname.items(), key=operator.itemgetter(1),
+                                reverse=True)[:10]:
+        print("  {:30s} {:2.0f}%". format(name, count * 100. / total))
diff --git a/utils/opt-viewer/opt-viewer.py b/utils/opt-viewer/opt-viewer.py
index c936597475c5d..a14aee5f298df 100755
--- a/utils/opt-viewer/opt-viewer.py
+++ b/utils/opt-viewer/opt-viewer.py
@@ -5,167 +5,41 @@ from __future__ import print_function
 desc = '''Generate HTML output to visualize optimization records from the YAML files
 generated with -fsave-optimization-record and -fdiagnostics-show-hotness.
 
-The tools requires PyYAML and Pygments Python packages.
+The tools requires PyYAML and Pygments Python packages.'''
 
-For faster parsing, you may want to use libYAML with PyYAML.'''
-
-import yaml
-# Try to use the C parser.
-try:
-    from yaml import CLoader as Loader
-except ImportError:
-    from yaml import Loader
+import optrecord
+import functools
+from multiprocessing import Pool
+from multiprocessing import Lock, cpu_count
+import errno
 import argparse
 import os.path
 import re
-import subprocess
 import shutil
 from pygments import highlight
 from pygments.lexers.c_cpp import CppLexer
 from pygments.formatters import HtmlFormatter
+import cgi
 
-parser = argparse.ArgumentParser(description=desc)
-parser.add_argument('yaml_files', nargs='+')
-parser.add_argument('output_dir')
-parser.add_argument('-source-dir', '-s', default='', help='set source directory')
-args = parser.parse_args()
-
-p = subprocess.Popen(['c++filt', '-n'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
-
-
-def demangle(name):
-    p.stdin.write(name + '\n')
-    return p.stdout.readline().rstrip()
-
-
-class Remark(yaml.YAMLObject):
-    max_hotness = 0
-
-    # Work-around for http://pyyaml.org/ticket/154.
-    yaml_loader = Loader
-
-    @classmethod
-    def should_display_hotness(cls):
-        # If max_hotness is 0 at the end, we assume hotness information is
-        # missing and no relative hotness information is displayed
-        return cls.max_hotness != 0
-
-    # Map function names to their source location for function where inlining happened
-    caller_loc = dict()
-
-    def __getattr__(self, name):
-        # If hotness is missing, assume 0
-        if name == 'Hotness':
-            return 0
-        raise AttributeError
-
-    @property
-    def File(self):
-        return self.DebugLoc['File']
-
-    @property
-    def Line(self):
-        return int(self.DebugLoc['Line'])
-
-    @property
-    def Column(self):
-        return self.DebugLoc['Column']
-
-    @property
-    def DebugLocString(self):
-        return "{}:{}:{}".format(self.File, self.Line, self.Column)
-
-    @property
-    def DemangledFunctionName(self):
-        return demangle(self.Function)
-
-    @classmethod
-    def make_link(cls, File, Line):
-        return "{}#L{}".format(SourceFileRenderer.html_file_name(File), Line)
-
-    @property
-    def Link(self):
-        return Remark.make_link(self.File, self.Line)
-
-    def getArgString(self, mapping):
-        mapping = mapping.copy()
-        dl = mapping.get('DebugLoc')
-        if dl:
-            del mapping['DebugLoc']
-
-        assert(len(mapping) == 1)
-        (key, value) = mapping.items()[0]
-
-        if key == 'Caller' or key == 'Callee':
-            value = demangle(value)
-
-        if dl and key != 'Caller':
-            return "<a href={}>{}</a>".format(
-                Remark.make_link(dl['File'], dl['Line']), value)
-        else:
-            return value
-
-    @property
-    def message(self):
-        # Args is a list of mappings (dictionaries)
-        values = [self.getArgString(mapping) for mapping in self.Args]
-        return "".join(values)
-
-    @property
-    def RelativeHotness(self):
-        if Remark.should_display_hotness():
-            return "{}%".format(int(round(self.Hotness * 100 / Remark.max_hotness)))
-        else:
-            return ''
-
-    @property
-    def key(self):
-        return (self.__class__, self.Pass, self.Name, self.File, self.Line, self.Column, self.message)
-
-
-class Analysis(Remark):
-    yaml_tag = '!Analysis'
-
-    @property
-    def color(self):
-        return "white"
-
-
-class AnalysisFPCommute(Analysis):
-    yaml_tag = '!AnalysisFPCommute'
-
-
-class AnalysisAliasing(Analysis):
-    yaml_tag = '!AnalysisAliasing'
-
-
-class Passed(Remark):
-    yaml_tag = '!Passed'
-
-    @property
-    def color(self):
-        return "green"
-
-
-class Missed(Remark):
-    yaml_tag = '!Missed'
-
-    @property
-    def color(self):
-        return "red"
+# This allows passing the global context to the child processes.
+class Context:
+    def __init__(self, caller_loc = dict()):
+       # Map function names to their source location for function where inlining happened
+       self.caller_loc = caller_loc
 
+context = Context()
 
 class SourceFileRenderer:
-    def __init__(self, filename):
+    def __init__(self, source_dir, output_dir, filename):
         existing_filename = None
         if os.path.exists(filename):
             existing_filename = filename
         else:
-            fn = os.path.join(args.source_dir, filename)
+            fn = os.path.join(source_dir, filename)
             if os.path.exists(fn):
                 existing_filename = fn
 
-        self.stream = open(os.path.join(args.output_dir, SourceFileRenderer.html_file_name(filename)), 'w')
+        self.stream = open(os.path.join(output_dir, optrecord.html_file_name(filename)), 'w')
         if existing_filename:
             self.source_stream = open(existing_filename)
         else:
@@ -176,35 +50,47 @@ class SourceFileRenderer:
 </html>
             '''.format(filename), file=self.stream)
 
-        self.html_formatter = HtmlFormatter()
-        self.cpp_lexer = CppLexer()
+        self.html_formatter = HtmlFormatter(encoding='utf-8')
+        self.cpp_lexer = CppLexer(stripnl=False)
 
-    def render_source_line(self, linenum, line):
-        html_line = highlight(line, self.cpp_lexer, self.html_formatter)
-        print('''
+    def render_source_lines(self, stream, line_remarks):
+        file_text = stream.read()
+        html_highlighted = highlight(file_text, self.cpp_lexer, self.html_formatter)
+
+        # Take off the header and footer, these must be
+        #   reapplied line-wise, within the page structure
+        html_highlighted = html_highlighted.replace('<div class="highlight"><pre>', '')
+        html_highlighted = html_highlighted.replace('</pre></div>', '')
+
+        for (linenum, html_line) in enumerate(html_highlighted.split('\n'), start=1):
+            print('''
 <tr>
 <td><a name=\"L{linenum}\">{linenum}</a></td>
 <td></td>
 <td></td>
-<td>{html_line}</td>
+<td><div class="highlight"><pre>{html_line}</pre></div></td>
 </tr>'''.format(**locals()), file=self.stream)
 
+            for remark in line_remarks.get(linenum, []):
+                self.render_inline_remarks(remark, html_line)
+
     def render_inline_remarks(self, r, line):
         inlining_context = r.DemangledFunctionName
-        dl = Remark.caller_loc.get(r.Function)
+        dl = context.caller_loc.get(r.Function)
         if dl:
-            link = Remark.make_link(dl['File'], dl['Line'] - 2)
+            link = optrecord.make_link(dl['File'], dl['Line'] - 2)
             inlining_context = "<a href={link}>{r.DemangledFunctionName}</a>".format(**locals())
 
         # Column is the number of characters *including* tabs, keep those and
         # replace everything else with spaces.
-        indent = line[:r.Column - 1]
+        indent = line[:max(r.Column, 1) - 1]
         indent = re.sub('\S', ' ', indent)
+
         print('''
 <tr>
 <td></td>
 <td>{r.RelativeHotness}</td>
-<td class=\"column-entry-{r.color}\">{r.Pass}</td>
+<td class=\"column-entry-{r.color}\">{r.PassWithDiffPrefix}</td>
 <td><pre style="display:inline">{indent}</pre><span class=\"column-entry-yellow\"> {r.message}&nbsp;</span></td>
 <td class=\"column-entry-yellow\">{inlining_context}</td>
 </tr>'''.format(**locals()), file=self.stream)
@@ -228,31 +114,26 @@ class SourceFileRenderer:
 <td>Source</td>
 <td>Inline Context</td>
 </tr>''', file=self.stream)
-        for (linenum, line) in enumerate(self.source_stream.readlines(), start=1):
-            self.render_source_line(linenum, line)
-            for remark in line_remarks.get(linenum, []):
-                self.render_inline_remarks(remark, line)
+        self.render_source_lines(self.source_stream, line_remarks)
+
         print('''
 </table>
 </body>
 </html>''', file=self.stream)
 
-    @classmethod
-    def html_file_name(cls, filename):
-        return filename.replace('/', '_') + ".html"
-
 
 class IndexRenderer:
-    def __init__(self):
-        self.stream = open(os.path.join(args.output_dir, 'index.html'), 'w')
+    def __init__(self, output_dir):
+        self.stream = open(os.path.join(output_dir, 'index.html'), 'w')
 
-    def render_entry(self, r):
+    def render_entry(self, r, odd):
+        escaped_name = cgi.escape(r.DemangledFunctionName)
         print('''
 <tr>
-<td><a href={r.Link}>{r.DebugLocString}</a></td>
-<td>{r.RelativeHotness}</td>
-<td>{r.DemangledFunctionName}</td>
-<td class=\"column-entry-{r.color}\">{r.Pass}</td>
+<td class=\"column-entry-{odd}\"><a href={r.Link}>{r.DebugLocString}</a></td>
+<td class=\"column-entry-{odd}\">{r.RelativeHotness}</td>
+<td class=\"column-entry-{odd}\">{escaped_name}</td>
+<td class=\"column-entry-{r.color}\">{r.PassWithDiffPrefix}</td>
 </tr>'''.format(**locals()), file=self.stream)
 
     def render(self, all_remarks):
@@ -270,49 +151,83 @@ class IndexRenderer:
 <td>Function</td>
 <td>Pass</td>
 </tr>''', file=self.stream)
-        for remark in all_remarks:
-            self.render_entry(remark)
+        for i, remark in enumerate(all_remarks):
+            self.render_entry(remark, i % 2)
         print('''
 </table>
 </body>
 </html>''', file=self.stream)
 
 
-all_remarks = dict()
-file_remarks = dict()
+def _render_file(source_dir, output_dir, ctx, entry):
+    global context
+    context = ctx
+    filename, remarks = entry
+    SourceFileRenderer(source_dir, output_dir, filename).render(remarks)
 
-for input_file in args.yaml_files:
-    f = open(input_file)
-    docs = yaml.load_all(f, Loader=Loader)
-    for remark in docs:
-        # Avoid remarks withoug debug location or if they are duplicated
-        if not hasattr(remark, 'DebugLoc') or remark.key in all_remarks:
-            continue
-        all_remarks[remark.key] = remark
 
-        file_remarks.setdefault(remark.File, dict()).setdefault(remark.Line, []).append(remark)
+def map_remarks(all_remarks):
+    # Set up a map between function names and their source location for
+    # function where inlining happened
+    for remark in all_remarks.itervalues():
+        if isinstance(remark, optrecord.Passed) and remark.Pass == "inline" and remark.Name == "Inlined":
+            for arg in remark.Args:
+                caller = arg.get('Caller')
+                if caller:
+                    context.caller_loc[caller] = arg['DebugLoc']
 
-        Remark.max_hotness = max(Remark.max_hotness, remark.Hotness)
 
-# Set up a map between function names and their source location for function where inlining happened
-for remark in all_remarks.itervalues():
-    if type(remark) == Passed and remark.Pass == "inline" and remark.Name == "Inlined":
-        for arg in remark.Args:
-            caller = arg.get('Caller')
-            if caller:
-                    Remark.caller_loc[caller] = arg['DebugLoc']
-
-if Remark.should_display_hotness():
-    sorted_remarks = sorted(all_remarks.itervalues(), key=lambda r: r.Hotness, reverse=True)
-else:
-    sorted_remarks = sorted(all_remarks.itervalues(), key=lambda r: (r.File, r.Line, r.Column))
-
-if not os.path.exists(args.output_dir):
-    os.mkdir(args.output_dir)
-
-for (filename, remarks) in file_remarks.iteritems():
-    SourceFileRenderer(filename).render(remarks)
-
-IndexRenderer().render(sorted_remarks)
-
-shutil.copy(os.path.join(os.path.dirname(os.path.realpath(__file__)), "style.css"), args.output_dir)
+def generate_report(pmap, all_remarks, file_remarks, source_dir, output_dir, should_display_hotness):
+    try:
+        os.makedirs(output_dir)
+    except OSError as e:
+        if e.errno == errno.EEXIST and os.path.isdir(output_dir):
+            pass
+        else:
+            raise
+
+    _render_file_bound = functools.partial(_render_file, source_dir, output_dir, context)
+    pmap(_render_file_bound, file_remarks.items())
+
+    if should_display_hotness:
+        sorted_remarks = sorted(all_remarks.itervalues(), key=lambda r: (r.Hotness, r.File, r.Line, r.Column, r.__dict__), reverse=True)
+    else:
+        sorted_remarks = sorted(all_remarks.itervalues(), key=lambda r: (r.File, r.Line, r.Column, r.__dict__))
+    IndexRenderer(args.output_dir).render(sorted_remarks)
+
+    shutil.copy(os.path.join(os.path.dirname(os.path.realpath(__file__)),
+            "style.css"), output_dir)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description=desc)
+    parser.add_argument('yaml_files', nargs='+')
+    parser.add_argument('output_dir')
+    parser.add_argument(
+        '--jobs',
+        '-j',
+        default=cpu_count(),
+        type=int,
+        help='Max job count (defaults to current CPU count)')
+    parser.add_argument(
+        '-source-dir',
+        '-s',
+        default='',
+        help='set source directory')
+    args = parser.parse_args()
+
+    if len(args.yaml_files) == 0:
+        parser.print_help()
+        sys.exit(1)
+
+    if args.jobs == 1:
+        pmap = map
+    else:
+        pool = Pool(processes=args.jobs)
+        pmap = pool.map
+
+    all_remarks, file_remarks, should_display_hotness = optrecord.gather_results(pmap, args.yaml_files)
+
+    map_remarks(all_remarks)
+
+    generate_report(pmap, all_remarks, file_remarks, args.source_dir, args.output_dir, should_display_hotness)
diff --git a/utils/opt-viewer/optrecord.py b/utils/opt-viewer/optrecord.py
new file mode 100644
index 0000000000000..3dc77e9db0199
--- /dev/null
+++ b/utils/opt-viewer/optrecord.py
@@ -0,0 +1,214 @@
+#!/usr/bin/env python2.7
+
+from __future__ import print_function
+
+import yaml
+# Try to use the C parser.
+try:
+    from yaml import CLoader as Loader
+except ImportError:
+    print("For faster parsing, you may want to install libYAML for PyYAML")
+    from yaml import Loader
+
+import functools
+from collections import defaultdict
+import itertools
+from multiprocessing import Pool
+from multiprocessing import Lock, cpu_count
+import cgi
+import subprocess
+
+import traceback
+
+p = subprocess.Popen(['c++filt', '-n'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+p_lock = Lock()
+
+
+def demangle(name):
+    with p_lock:
+        p.stdin.write(name + '\n')
+        return p.stdout.readline().rstrip()
+
+def html_file_name(filename):
+    return filename.replace('/', '_') + ".html"
+
+def make_link(File, Line):
+    return "{}#L{}".format(html_file_name(File), Line)
+
+
+class Remark(yaml.YAMLObject):
+    # Work-around for http://pyyaml.org/ticket/154.
+    yaml_loader = Loader
+
+    def initmissing(self):
+        if not hasattr(self, 'Hotness'):
+            self.Hotness = 0
+        if not hasattr(self, 'Args'):
+            self.Args = []
+
+    @property
+    def File(self):
+        return self.DebugLoc['File']
+
+    @property
+    def Line(self):
+        return int(self.DebugLoc['Line'])
+
+    @property
+    def Column(self):
+        return self.DebugLoc['Column']
+
+    @property
+    def DebugLocString(self):
+        return "{}:{}:{}".format(self.File, self.Line, self.Column)
+
+    @property
+    def DemangledFunctionName(self):
+        return demangle(self.Function)
+
+    @property
+    def Link(self):
+        return make_link(self.File, self.Line)
+
+    def getArgString(self, mapping):
+        mapping = mapping.copy()
+        dl = mapping.get('DebugLoc')
+        if dl:
+            del mapping['DebugLoc']
+
+        assert(len(mapping) == 1)
+        (key, value) = mapping.items()[0]
+
+        if key == 'Caller' or key == 'Callee':
+            value = cgi.escape(demangle(value))
+
+        if dl and key != 'Caller':
+            return "<a href={}>{}</a>".format(
+                make_link(dl['File'], dl['Line']), value)
+        else:
+            return value
+
+    def getDiffPrefix(self):
+        if hasattr(self, 'Added'):
+            if self.Added:
+                return '+'
+            else:
+                return '-'
+        return ''
+
+    @property
+    def PassWithDiffPrefix(self):
+        return self.getDiffPrefix() + self.Pass
+
+    @property
+    def message(self):
+        # Args is a list of mappings (dictionaries)
+        values = [self.getArgString(mapping) for mapping in self.Args]
+        return "".join(values)
+
+    @property
+    def RelativeHotness(self):
+        if self.max_hotness:
+            return "{}%".format(int(round(self.Hotness * 100 / self.max_hotness)))
+        else:
+            return ''
+
+    @property
+    def key(self):
+        k = (self.__class__, self.PassWithDiffPrefix, self.Name, self.File, self.Line, self.Column, self.Function)
+        for arg in self.Args:
+            for (key, value) in arg.iteritems():
+                if type(value) is dict:
+                    value = tuple(value.items())
+                k += (key, value)
+        return k
+
+    def __hash__(self):
+        return hash(self.key)
+
+    def __eq__(self, other):
+        return self.key == other.key
+
+    def __repr__(self):
+        return str(self.key)
+
+
+class Analysis(Remark):
+    yaml_tag = '!Analysis'
+
+    @property
+    def color(self):
+        return "white"
+
+
+class AnalysisFPCommute(Analysis):
+    yaml_tag = '!AnalysisFPCommute'
+
+
+class AnalysisAliasing(Analysis):
+    yaml_tag = '!AnalysisAliasing'
+
+
+class Passed(Remark):
+    yaml_tag = '!Passed'
+
+    @property
+    def color(self):
+        return "green"
+
+
+class Missed(Remark):
+    yaml_tag = '!Missed'
+
+    @property
+    def color(self):
+        return "red"
+
+
+def get_remarks(input_file):
+    max_hotness = 0
+    all_remarks = dict()
+    file_remarks = defaultdict(functools.partial(defaultdict, list))
+
+    with open(input_file) as f:
+        docs = yaml.load_all(f, Loader=Loader)
+        for remark in docs:
+            remark.initmissing()
+            # Avoid remarks withoug debug location or if they are duplicated
+            if not hasattr(remark, 'DebugLoc') or remark.key in all_remarks:
+                continue
+            all_remarks[remark.key] = remark
+
+            file_remarks[remark.File][remark.Line].append(remark)
+
+            # If we're reading a back a diff yaml file, max_hotness is already
+            # captured which may actually be less than the max hotness found
+            # in the file.
+            if hasattr(remark, 'max_hotness'):
+                max_hotness = remark.max_hotness
+            max_hotness = max(max_hotness, remark.Hotness)
+
+    return max_hotness, all_remarks, file_remarks
+
+
+def gather_results(pmap, filenames):
+    remarks = pmap(get_remarks, filenames)
+    max_hotness = max(entry[0] for entry in remarks)
+
+    def merge_file_remarks(file_remarks_job, all_remarks, merged):
+        for filename, d in file_remarks_job.iteritems():
+            for line, remarks in d.iteritems():
+                for remark in remarks:
+                    # Bring max_hotness into the remarks so that
+                    # RelativeHotness does not depend on an external global.
+                    remark.max_hotness = max_hotness
+                    if remark.key not in all_remarks:
+                        merged[filename][line].append(remark)
+
+    all_remarks = dict()
+    file_remarks = defaultdict(functools.partial(defaultdict, list))
+    for _, all_remarks_job, file_remarks_job in remarks:
+        merge_file_remarks(file_remarks_job, all_remarks, file_remarks)
+        all_remarks.update(all_remarks_job)
+
+    return all_remarks, file_remarks, max_hotness != 0
diff --git a/utils/opt-viewer/style.css b/utils/opt-viewer/style.css
index 2ef244a157188..595c3e46847dd 100644
--- a/utils/opt-viewer/style.css
+++ b/utils/opt-viewer/style.css
@@ -62,6 +62,12 @@ table {
   text-align: left;
   background-color: #ffe1a6;
 }
+.column-entry-0 {
+  background-color: #ffffff;
+}
+.column-entry-1 {
+  background-color: #eeeeee;
+}
 .line-number {
   text-align: right;
   color: #aaa;
diff --git a/utils/prepare-code-coverage-artifact.py b/utils/prepare-code-coverage-artifact.py
index 726375e899cd7..883cdd78049be 100644
--- a/utils/prepare-code-coverage-artifact.py
+++ b/utils/prepare-code-coverage-artifact.py
@@ -6,6 +6,9 @@ from __future__ import print_function
 
 - Collate raw profiles into one indexed profile.
 - Generate html reports for the given binaries.
+
+Caution: The positional arguments to this script must be specified before any 
+optional arguments, such as --restrict.
 '''
 
 import argparse
@@ -84,7 +87,8 @@ if __name__ == '__main__':
                        help='Emit a unified report for all binaries')
     parser.add_argument('--restrict', metavar='R', type=str, nargs='*',
                        default=[],
-                       help='Restrict the reporting to the given source paths')
+                       help='Restrict the reporting to the given source paths'
+                   ' (must be specified after all other positional arguments)')
     args = parser.parse_args()
 
     if args.use_existing_profdata and args.only_merge:
diff --git a/utils/release/build_llvm_package.bat b/utils/release/build_llvm_package.bat
index bb6853f57c6fd..eca74347cf3e7 100755
--- a/utils/release/build_llvm_package.bat
+++ b/utils/release/build_llvm_package.bat
@@ -8,9 +8,9 @@ REM Usage: build_llvm_package.bat <revision>
 
 REM Prerequisites:
 REM
-REM   Visual Studio 2015, CMake, Ninja, SVN, GNUWin32, SWIG, Python 3,
+REM   Visual Studio 2017, CMake, Ninja, SVN, GNUWin32, SWIG, Python 3,
 REM   NSIS with the strlen_8192 patch,
-REM   Visual Studio 2015 SDK (for the clang-format plugin).
+REM   Visual Studio 2017 SDK (for the clang-format plugin).
 REM
 REM
 REM   For LLDB, SWIG version <= 3.0.8 needs to be used to work around
@@ -18,15 +18,16 @@ REM   https://github.com/swig/swig/issues/769
 
 
 REM You need to modify the paths below:
-set vcdir=c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC
+set vsdevcmd=C:\Program Files (x86)\Microsoft Visual Studio\2017\Professional\Common7\Tools\VsDevCmd.bat
+
 set python32_dir=C:\Users\hwennborg\AppData\Local\Programs\Python\Python35-32
 set python64_dir=C:\Users\hwennborg\AppData\Local\Programs\Python\Python35
 set PATH=%PATH%;c:\gnuwin32\bin
 
 set revision=%1
 set branch=trunk
-set package_version=4.0.0-r%revision%
-set clang_format_vs_version=4.0.0.%revision%
+set package_version=5.0.0-r%revision%
+set clang_format_vs_version=5.0.0.%revision%
 set build_dir=llvm_package_%revision%
 
 echo Branch: %branch%
@@ -47,14 +48,15 @@ svn.exe export -r %revision% http://llvm.org/svn/llvm-project/clang-tools-extra/
 svn.exe export -r %revision% http://llvm.org/svn/llvm-project/lld/%branch% llvm/tools/lld || exit /b
 svn.exe export -r %revision% http://llvm.org/svn/llvm-project/compiler-rt/%branch% llvm/projects/compiler-rt || exit /b
 svn.exe export -r %revision% http://llvm.org/svn/llvm-project/openmp/%branch% llvm/projects/openmp || exit /b
+svn.exe export -r %revision% http://llvm.org/svn/llvm-project/lldb/%branch% llvm/tools/lldb || exit /b
 
 
 REM Setting CMAKE_CL_SHOWINCLUDES_PREFIX to work around PR27226.
-set cmake_flags=-DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=ON -DLLVM_INSTALL_TOOLCHAIN_ONLY=ON -DLLVM_USE_CRT_RELEASE=MT -DCLANG_FORMAT_VS_VERSION=%clang_format_vs_version% -DPACKAGE_VERSION=%package_version% -DLLDB_RELOCATABLE_PYTHON=1 -DLLDB_TEST_COMPILER=%cd%\build32_stage0\bin\clang.exe -DCMAKE_CL_SHOWINCLUDES_PREFIX="Note: including file: "
+set cmake_flags=-DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=ON -DLLVM_INSTALL_TOOLCHAIN_ONLY=ON -DCMAKE_INSTALL_UCRT_LIBRARIES=ON -DCLANG_FORMAT_VS_VERSION=%clang_format_vs_version% -DPACKAGE_VERSION=%package_version% -DLLDB_RELOCATABLE_PYTHON=1 -DLLDB_TEST_COMPILER=%cd%\build32_stage0\bin\clang.exe -DCMAKE_CL_SHOWINCLUDES_PREFIX="Note: including file: "
 
 REM TODO: Run all tests, including lld and compiler-rt.
 
-call "%vcdir%/vcvarsall.bat" x86
+call "%vsdevcmd%" -arch=x86
 set CC=
 set CXX=
 mkdir build32_stage0
@@ -69,16 +71,25 @@ mkdir build32
 cd build32
 set CC=..\build32_stage0\bin\clang-cl
 set CXX=..\build32_stage0\bin\clang-cl
-cmake -GNinja %cmake_flags% -DBUILD_CLANG_FORMAT_VS_PLUGIN=ON -DPYTHON_HOME=%python32_dir% ..\llvm || exit /b
+cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python32_dir% ..\llvm || exit /b
 ninja all || exit /b
 ninja check || ninja check || ninja check || exit /b
 ninja check-clang || ninja check-clang || ninja check-clang ||  exit /b
-copy ..\llvm\tools\clang\tools\clang-format-vs\ClangFormat\bin\Release\ClangFormat.vsix ClangFormat-r%revision%.vsix
 ninja package || exit /b
 cd ..
 
+REM The plug-in is built separately as it uses a statically linked clang-format.exe.
+mkdir build_vsix
+cd build_vsix
+set CC=..\build32_stage0\bin\clang-cl
+set CXX=..\build32_stage0\bin\clang-cl
+cmake -GNinja %cmake_flags% -DLLVM_USE_CRT_RELEASE=MT -DBUILD_CLANG_FORMAT_VS_PLUGIN=ON -DPYTHON_HOME=%python32_dir% ..\llvm || exit /b
+ninja clang_format_vsix || exit /b
+copy ..\llvm\tools\clang\tools\clang-format-vs\ClangFormat\bin\Release\ClangFormat.vsix ClangFormat-r%revision%.vsix
+cd ..
+
 
-call "%vcdir%/vcvarsall.bat" amd64
+call "%vsdevcmd%" -arch=amd64
 set CC=
 set CXX=
 mkdir build64_stage0
diff --git a/utils/release/merge-request.sh b/utils/release/merge-request.sh
new file mode 100755
index 0000000000000..3345d2ad85c53
--- /dev/null
+++ b/utils/release/merge-request.sh
@@ -0,0 +1,198 @@
+# !/bin/bash
+#===-- merge-request.sh  ---------------------------------------------------===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License.
+#
+#===------------------------------------------------------------------------===#
+#
+# Submit a merge request to bugzilla.
+#
+#===------------------------------------------------------------------------===#
+
+dryrun=""
+stable_version=""
+revision=""
+BUGZILLA_BIN=""
+BUGZILLA_CMD=""
+release_metabug=""
+bugzilla_product="new-bugs"
+bugzilla_component="new bugs"
+bugzilla_assigned_to=""
+bugzilla_user=""
+bugzilla_version=""
+bugzilla_url="http://bugs.llvm.org/xmlrpc.cgi"
+
+function usage() {
+  echo "usage: `basename $0` -user EMAIL -stable-version X.Y -r NUM"
+  echo ""
+  echo " -user EMAIL             Your email address for logging into bugzilla."
+  echo " -stable-version X.Y     The stable release version (e.g. 4.0, 5.0)."
+  echo " -r NUM                  Revision number to merge (e.g. 1234567)."
+  echo " -bugzilla-bin PATH      Path to bugzilla binary (optional)."
+  echo " -assign-to EMAIL        Assign bug to user with EMAIL (optional)."
+  echo " -dry-run                Print commands instead of executing them."
+}
+
+while [ $# -gt 0 ]; do
+  case $1 in
+    -user)
+      shift
+      bugzilla_user="$1"
+      ;;
+    -stable-version)
+      shift
+      stable_version="$1"
+      ;;
+    -r)
+      shift
+      revision="$1"
+      ;;
+    -project)
+      shift
+      project="$1"
+      ;;
+    -component)
+      shift
+      bugzilla_component="$1"
+      ;;
+    -bugzilla-bin)
+      shift
+      BUGZILLA_BIN="$1"
+      ;;
+    -assign-to)
+      shift
+      bugzilla_assigned_to="--assigned_to=$1"
+      ;;
+    -dry-run)
+      dryrun="echo"
+      ;;
+    -help | --help | -h | --h | -\? )
+      usage
+      exit 0
+      ;;
+    * )
+      echo "unknown option: $1"
+      usage
+      exit 1
+      ;;
+  esac
+  shift
+done
+
+if [ -z "$stable_version" ]; then
+  echo "error: no stable version specified"
+  exit 1
+fi
+
+case $stable_version in
+  4.0)
+    release_metabug="32061"
+    ;;
+  *)
+    echo "error: invalid stable version"
+    exit 1
+esac
+bugzilla_version=$stable_version
+
+if [ -z "$revision" ]; then
+  echo "error: revision not specified"
+  exit 1
+fi
+
+if [ -z "$bugzilla_user" ]; then
+  echo "error: bugzilla username not specified."
+  exit 1
+fi
+
+if [ -z "$BUGZILLA_BIN" ]; then
+  BUGZILLA_BIN=`which bugzilla`
+  if [ $? -ne 0 ]; then
+    echo "error: could not find bugzilla executable."
+    echo "Make sure the bugzilla cli tool is installed on your system: "
+    echo "pip install python-bugzilla (recommended)"
+    echo ""
+    echo "Fedora: dnf install python-bugzilla"
+    echo "Ubuntu/Debian: apt-get install bugzilla-cli"
+    exit 1
+  fi
+fi
+
+BUGZILLA_MAJOR_VERSION=`$BUGZILLA_BIN --version 2>&1 | cut -d . -f 1`
+
+if [ $BUGZILLA_MAJOR_VERSION -eq 1 ]; then
+
+  echo "***************************** Warning *******************************"
+  echo "You are using an older version of the bugzilla cli tool.  You will be "
+  echo "able to create bugs, but this script will crash with the following "
+  echo "error when trying to read back information about the bug you created:"
+  echo ""
+  echo "KeyError: 'internals'"
+  echo ""
+  echo "To avoid this error, use version 2.0.0 or higher"
+  echo "https://pypi.python.org/pypi/python-bugzilla"
+  echo "*********************************************************************"
+fi
+
+BUGZILLA_CMD="$BUGZILLA_BIN --bugzilla=$bugzilla_url"
+
+bug_url="https://reviews.llvm.org/rL$revision"
+
+echo "Checking for duplicate bugs..."
+
+check_duplicates=`$BUGZILLA_CMD query --url $bug_url`
+
+if [ -n "$check_duplicates" ]; then
+  echo "Duplicate bug found:"
+  echo $check_duplicates
+  exit 1
+fi
+
+echo "Done"
+
+# Get short commit summary
+commit_summary=''
+commit_msg=`svn log -r $revision https://llvm.org/svn/llvm-project/`
+if [ $? -ne 0 ]; then
+  echo "warning: failed to get commit message."
+  commit_msg=""
+fi
+
+if [ -n "$commit_msg" ]; then
+  commit_summary=`echo "$commit_msg" | sed '4q;d' | cut -c1-80`
+  commit_summary=" : ${commit_summary}"
+fi
+
+bug_summary="Merge r$revision into the $stable_version branch${commit_summary}"
+
+if [ -z "$dryrun" ]; then
+  set -x
+fi
+
+${dryrun} $BUGZILLA_CMD --login --user=$bugzilla_user new \
+  -p "$bugzilla_product" \
+  -c "$bugzilla_component" -u $bug_url --blocked=$release_metabug \
+  -o All --priority=P --arch All -v $bugzilla_version \
+  --summary "${bug_summary}" \
+  -l "Is this patch OK to merge to the $stable_version branch?" \
+  $bugzilla_assigned_to \
+  --oneline
+
+set +x
+
+if [ -n "$dryrun" ]; then
+  exit 0
+fi
+
+if [ $BUGZILLA_MAJOR_VERSION -eq 1 ]; then
+  success=`$BUGZILLA_CMD query --url $bug_url`
+  if [ -z "$success" ]; then
+    echo "Failed to create bug."
+    exit 1
+  fi
+
+  echo " Created new bug:"
+  echo $success
+fi
diff --git a/utils/unittest/CMakeLists.txt b/utils/unittest/CMakeLists.txt
index a50733af9aae1..b42ac834e3a77 100644
--- a/utils/unittest/CMakeLists.txt
+++ b/utils/unittest/CMakeLists.txt
@@ -40,8 +40,8 @@ if (NOT LLVM_ENABLE_THREADS)
   add_definitions( -DGTEST_HAS_PTHREAD=0 )
 endif()
 
-find_library(PTHREAD_LIBRARY_PATH pthread)
-if (PTHREAD_LIBRARY_PATH)
+find_library(LLVM_PTHREAD_LIBRARY_PATH pthread)
+if (LLVM_PTHREAD_LIBRARY_PATH)
   list(APPEND LIBS pthread)
 endif()
 
diff --git a/utils/update_llc_test_checks.py b/utils/update_llc_test_checks.py
index d077c8a8eb245..3b3ff74d8633d 100755
--- a/utils/update_llc_test_checks.py
+++ b/utils/update_llc_test_checks.py
@@ -29,6 +29,8 @@ def llc(args, cmd_args, ir):
 SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
 SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
 SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
+SCRUB_LOOP_COMMENT_RE = re.compile(
+    r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M)
 
 ASM_FUNCTION_X86_RE = re.compile(
     r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?'
@@ -54,7 +56,7 @@ RUN_LINE_RE = re.compile('^\s*;\s*RUN:\s*(.*)$')
 TRIPLE_ARG_RE = re.compile(r'-mtriple=([^ ]+)')
 TRIPLE_IR_RE = re.compile(r'^target\s+triple\s*=\s*"([^"]+)"$')
 IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@(\w+)\s*\(')
-CHECK_PREFIX_RE = re.compile('--check-prefix=(\S+)')
+CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?=(\S+)')
 CHECK_RE = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
 
 ASM_FUNCTION_PPC_RE = re.compile(
@@ -68,6 +70,13 @@ ASM_FUNCTION_PPC_RE = re.compile(
     r'.Lfunc_end[0-9]+:\n',
     flags=(re.M | re.S))
 
+ASM_FUNCTION_SYSTEMZ_RE = re.compile(
+    r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n'
+    r'[ \t]+.cfi_startproc\n'
+    r'(?P<body>.*?)\n'
+    r'.Lfunc_end[0-9]+:\n',
+    flags=(re.M | re.S))
+
 
 def scrub_asm_x86(asm):
   # Scrub runs of whitespace out of the assembly, but leave the leading
@@ -107,6 +116,18 @@ def scrub_asm_powerpc64le(asm):
   asm = SCRUB_WHITESPACE_RE.sub(r' ', asm)
   # Expand the tabs used for indentation.
   asm = string.expandtabs(asm, 2)
+  # Stripe unimportant comments
+  asm = SCRUB_LOOP_COMMENT_RE.sub(r'', asm)
+  # Strip trailing whitespace.
+  asm = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
+  return asm
+
+def scrub_asm_systemz(asm):
+  # Scrub runs of whitespace out of the assembly, but leave the leading
+  # whitespace in place.
+  asm = SCRUB_WHITESPACE_RE.sub(r' ', asm)
+  # Expand the tabs used for indentation.
+  asm = string.expandtabs(asm, 2)
   # Strip trailing whitespace.
   asm = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
   return asm
@@ -121,7 +142,11 @@ def build_function_body_dictionary(raw_tool_output, triple, prefixes, func_dict,
       'x86': (scrub_asm_x86, ASM_FUNCTION_X86_RE),
       'i386': (scrub_asm_x86, ASM_FUNCTION_X86_RE),
       'arm-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumb-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv8-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'armeb-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
       'powerpc64le': (scrub_asm_powerpc64le, ASM_FUNCTION_PPC_RE),
+      's390x': (scrub_asm_systemz, ASM_FUNCTION_SYSTEMZ_RE),
   }
   handlers = None
   for prefix, s in target_handlers.items():
@@ -260,8 +285,8 @@ def main():
       llc_cmd_args = llc_cmd[len('llc'):].strip()
       llc_cmd_args = llc_cmd_args.replace('< %s', '').replace('%s', '').strip()
 
-      check_prefixes = [m.group(1)
-                        for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)]
+      check_prefixes = [item for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
+                               for item in m.group(1).split(',')]
       if not check_prefixes:
         check_prefixes = ['CHECK']
 
diff --git a/utils/update_test_checks.py b/utils/update_test_checks.py
index 92ab5ef6599cc..cace9023106f1 100755
--- a/utils/update_test_checks.py
+++ b/utils/update_test_checks.py
@@ -66,11 +66,11 @@ OPT_FUNCTION_RE = re.compile(
     r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*\('
     r'(\s+)?[^)]*[^{]*\{\n(?P<body>.*?)^\}$',
     flags=(re.M | re.S))
-CHECK_PREFIX_RE = re.compile('--check-prefix=(\S+)')
+CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?=(\S+)')
 CHECK_RE = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
 # Match things that look at identifiers, but only if they are followed by
 # spaces, commas, paren, or end of the string
-IR_VALUE_RE = re.compile(r'(\s+)%(.+?)([,\s\(\)]|\Z)')
+IR_VALUE_RE = re.compile(r'(\s+)%([\w\.]+?)([,\s\(\)]|\Z)')
 
 
 # Invoke the tool that is being tested.
@@ -324,8 +324,8 @@ def main():
       tool_cmd_args = tool_cmd[len(tool_basename):].strip()
       tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
 
-      check_prefixes = [m.group(1)
-                        for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)]
+      check_prefixes = [item for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
+                               for item in m.group(1).split(',')]
       if not check_prefixes:
         check_prefixes = ['CHECK']
 
diff --git a/utils/vim/syntax/llvm.vim b/utils/vim/syntax/llvm.vim
index 5a2757c1159ef..22d688b14864c 100644
--- a/utils/vim/syntax/llvm.vim
+++ b/utils/vim/syntax/llvm.vim
@@ -1,7 +1,7 @@
 " Vim syntax file
 " Language:   llvm
 " Maintainer: The LLVM team, http://llvm.org/
-" Version:      $Revision: 285927 $
+" Version:      $Revision: 294808 $
 
 if version < 600
   syntax clear
@@ -123,6 +123,7 @@ syn keyword llvmKeyword
       \ readnone
       \ readonly
       \ release
+      \ returned
       \ returns_twice
       \ sanitize_address
       \ sanitize_memory
diff --git a/utils/vim/vimrc b/utils/vim/vimrc
index c35eb0ecbbeb4..37983e4779fee 100644
--- a/utils/vim/vimrc
+++ b/utils/vim/vimrc
@@ -1,5 +1,5 @@
 " LLVM coding guidelines conformance for VIM
-" $Revision: 176235 $
+" $Revision: 293693 $
 "
 " Maintainer: The LLVM Team, http://llvm.org
 " WARNING:    Read before you source in all these commands and macros!  Some
@@ -74,13 +74,13 @@ command! DeleteTrailingWs :%s/\s\+$//
 command! Untab :%s/\t/  /g
 
 " Enable syntax highlighting for LLVM files. To use, copy
-" utils/vim/llvm.vim to ~/.vim/syntax .
+" utils/vim/syntax/llvm.vim to ~/.vim/syntax .
 augroup filetype
   au! BufRead,BufNewFile *.ll     set filetype=llvm
 augroup END
 
 " Enable syntax highlighting for tablegen files. To use, copy
-" utils/vim/tablegen.vim to ~/.vim/syntax .
+" utils/vim/syntax/tablegen.vim to ~/.vim/syntax .
 augroup filetype
   au! BufRead,BufNewFile *.td     set filetype=tablegen
 augroup END