vendor/llvm-project/llvmorg-10.0.0-rc2-0-g90c78073f73

author: Dimitry Andric <dim@FreeBSD.org> 2020-02-14 21:24:03 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2020-02-14 21:24:03 +0000
commit: d75c7debad4509ece98792074e64b8a650a27bdb (patch)
tree: f8d77975739b43bf7ffef0612579168cb9ec9474
parent: 9c2f6c4bb805c7ac08c8925c96e429fcc322725e (diff)
158 files changed, 2870 insertions, 1377 deletions
diff --git a/clang/include/clang/AST/ASTConcept.h b/clang/include/clang/AST/ASTConcept.h
index 30c4706d2a15..3ebaad4eafdd 100644
--- a/clang/include/clang/AST/ASTConcept.h
+++ b/clang/include/clang/AST/ASTConcept.h
@@ -29,14 +29,14 @@ class ConceptSpecializationExpr;
 class ConstraintSatisfaction : public llvm::FoldingSetNode {
   // The template-like entity that 'owns' the constraint checked here (can be a
   // constrained entity or a concept).
-  NamedDecl *ConstraintOwner = nullptr;
+  const NamedDecl *ConstraintOwner = nullptr;
   llvm::SmallVector<TemplateArgument, 4> TemplateArgs;
 
 public:
 
   ConstraintSatisfaction() = default;
 
-  ConstraintSatisfaction(NamedDecl *ConstraintOwner,
+  ConstraintSatisfaction(const NamedDecl *ConstraintOwner,
                          ArrayRef<TemplateArgument> TemplateArgs) :
       ConstraintOwner(ConstraintOwner), TemplateArgs(TemplateArgs.begin(),
                                                      TemplateArgs.end()) { }
@@ -57,7 +57,7 @@ public:
   }
 
   static void Profile(llvm::FoldingSetNodeID &ID, const ASTContext &C,
-                      NamedDecl *ConstraintOwner,
+                      const NamedDecl *ConstraintOwner,
                       ArrayRef<TemplateArgument> TemplateArgs);
 };
 
diff --git a/clang/include/clang/AST/ExprConcepts.h b/clang/include/clang/AST/ExprConcepts.h
index 2a64326e8604..271d487e2fc9 100644
--- a/clang/include/clang/AST/ExprConcepts.h
+++ b/clang/include/clang/AST/ExprConcepts.h
@@ -63,6 +63,12 @@ protected:
                             ArrayRef<TemplateArgument> ConvertedArgs,
                             const ConstraintSatisfaction *Satisfaction);
 
+  ConceptSpecializationExpr(const ASTContext &C, ConceptDecl *NamedConcept,
+                            ArrayRef<TemplateArgument> ConvertedArgs,
+                            const ConstraintSatisfaction *Satisfaction,
+                            bool Dependent,
+                            bool ContainsUnexpandedParameterPack);
+
   ConceptSpecializationExpr(EmptyShell Empty, unsigned NumTemplateArgs);
 
 public:
@@ -76,6 +82,13 @@ public:
          const ConstraintSatisfaction *Satisfaction);
 
   static ConceptSpecializationExpr *
+  Create(const ASTContext &C, ConceptDecl *NamedConcept,
+         ArrayRef<TemplateArgument> ConvertedArgs,
+         const ConstraintSatisfaction *Satisfaction,
+         bool Dependent,
+         bool ContainsUnexpandedParameterPack);
+
+  static ConceptSpecializationExpr *
   Create(ASTContext &C, EmptyShell Empty, unsigned NumTemplateArgs);
 
   ArrayRef<TemplateArgument> getTemplateArguments() const {
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index ef5d24dcf888..da572957d10d 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -11,6 +11,7 @@
 
 namespace llvm {
 class StringRef;
+class Twine;
 class VersionTuple;
 } // namespace llvm
 
@@ -30,7 +31,7 @@ enum class CudaVersion {
 };
 const char *CudaVersionToString(CudaVersion V);
 // Input is "Major.Minor"
-CudaVersion CudaStringToVersion(llvm::StringRef S);
+CudaVersion CudaStringToVersion(const llvm::Twine &S);
 
 enum class CudaArch {
   UNKNOWN,
diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 2da41bef2669..ecd871e36ee8 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -60,6 +60,9 @@ def err_drv_cuda_version_unsupported : Error<
   "but installation at %3 is %4.  Use --cuda-path to specify a different CUDA "
   "install, pass a different GPU arch with --cuda-gpu-arch, or pass "
   "--no-cuda-version-check.">;
+def warn_drv_unknown_cuda_version: Warning<
+  "Unknown CUDA version %0. Assuming the latest supported version %1">,
+  InGroup<CudaUnknownVersion>;
 def err_drv_cuda_host_arch : Error<"unsupported architecture '%0' for host compilation.">;
 def err_drv_mix_cuda_hip : Error<"Mixed Cuda and HIP compilation is not supported.">;
 def err_drv_invalid_thread_model_for_target : Error<
diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index a15fb908c537..5ad07915d2f5 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -384,7 +384,10 @@ def GNULabelsAsValue : DiagGroup<"gnu-label-as-value">;
 def LiteralRange : DiagGroup<"literal-range">;
 def LocalTypeTemplateArgs : DiagGroup<"local-type-template-args",
                                       [CXX98CompatLocalTypeTemplateArgs]>;
-def RangeLoopAnalysis : DiagGroup<"range-loop-analysis">;
+def RangeLoopConstruct : DiagGroup<"range-loop-construct">;
+def RangeLoopBindReference : DiagGroup<"range-loop-bind-reference">;
+def RangeLoopAnalysis : DiagGroup<"range-loop-analysis",
+                                  [RangeLoopConstruct, RangeLoopBindReference]>;
 def ForLoopAnalysis : DiagGroup<"for-loop-analysis">;
 def LoopAnalysis : DiagGroup<"loop-analysis", [ForLoopAnalysis,
                                                RangeLoopAnalysis]>;
@@ -858,14 +861,15 @@ def Most : DiagGroup<"most", [
     Comment,
     DeleteNonVirtualDtor,
     Format,
+    ForLoopAnalysis,
     Implicit,
     InfiniteRecursion,
     IntInBoolContext,
-    LoopAnalysis,
     MismatchedTags,
     MissingBraces,
     Move,
     MultiChar,
+    RangeLoopConstruct,
     Reorder,
     ReturnType,
     SelfAssignment,
@@ -1113,6 +1117,9 @@ def SerializedDiagnostics : DiagGroup<"serialized-diagnostics">;
 // compiling CUDA C/C++ but which is not compatible with the CUDA spec.
 def CudaCompat : DiagGroup<"cuda-compat">;
 
+// Warning about unknown CUDA SDK version.
+def CudaUnknownVersion: DiagGroup<"unknown-cuda-version">;
+
 // A warning group for warnings about features supported by HIP but
 // ignored by CUDA.
 def HIPOnly : DiagGroup<"hip-only">;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 7636d04a34c3..2199dfbddc84 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -2378,17 +2378,17 @@ def warn_for_range_const_reference_copy : Warning<
   "loop variable %0 "
   "%diff{has type $ but is initialized with type $"
   "| is initialized with a value of a different type}1,2 resulting in a copy">,
-  InGroup<RangeLoopAnalysis>, DefaultIgnore;
+  InGroup<RangeLoopConstruct>, DefaultIgnore;
 def note_use_type_or_non_reference : Note<
   "use non-reference type %0 to keep the copy or type %1 to prevent copying">;
 def warn_for_range_variable_always_copy : Warning<
   "loop variable %0 is always a copy because the range of type %1 does not "
   "return a reference">,
-  InGroup<RangeLoopAnalysis>, DefaultIgnore;
+  InGroup<RangeLoopBindReference>, DefaultIgnore;
 def note_use_non_reference_type : Note<"use non-reference type %0">;
 def warn_for_range_copy : Warning<
   "loop variable %0 of type %1 creates a copy from type %2">,
-  InGroup<RangeLoopAnalysis>, DefaultIgnore;
+  InGroup<RangeLoopConstruct>, DefaultIgnore;
 def note_use_reference_type : Note<"use reference type %0 to prevent copying">;
 def err_objc_for_range_init_stmt : Error<
   "initialization statement is not supported when iterating over Objective-C "
@@ -4683,6 +4683,8 @@ def note_checking_constraints_for_var_spec_id_here : Note<
 def note_checking_constraints_for_class_spec_id_here : Note<
   "while checking constraint satisfaction for class template partial "
   "specialization '%0' required here">;
+def note_checking_constraints_for_function_here : Note<
+  "while checking constraint satisfaction for function '%0' required here">;
 def note_constraint_substitution_here : Note<
   "while substituting template arguments into constraint expression here">;
 def note_constraint_normalization_here : Note<
@@ -6746,6 +6748,10 @@ def err_bad_cxx_cast_scalar_to_vector_different_size : Error<
 def err_bad_cxx_cast_vector_to_vector_different_size : Error<
   "%select{||reinterpret_cast||C-style cast|}0 from vector %1 "
   "to vector %2 of different size">;
+def warn_bad_cxx_cast_nested_pointer_addr_space : Warning<
+  "%select{reinterpret_cast|C-style cast}0 from %1 to %2 "
+  "changes address space of nested pointers">,
+  InGroup<IncompatiblePointerTypesDiscardsQualifiers>;
 def err_bad_lvalue_to_rvalue_cast : Error<
   "cannot cast from lvalue of type %1 to rvalue reference type %2; types are "
   "not compatible">;
@@ -8390,6 +8396,12 @@ def note_defaulted_comparison_cannot_deduce : Note<
   "return type of defaulted 'operator<=>' cannot be deduced because "
   "return type %2 of three-way comparison for %select{|member|base class}0 %1 "
   "is not a standard comparison category type">;
+def err_defaulted_comparison_cannot_deduce_undeduced_auto : Error<
+  "return type of defaulted 'operator<=>' cannot be deduced because "
+  "three-way comparison for %select{|member|base class}0 %1 "
+  "has a deduced return type and is not yet defined">;
+def note_defaulted_comparison_cannot_deduce_undeduced_auto : Note<
+  "%select{|member|base class}0 %1 declared here">;
 def note_defaulted_comparison_cannot_deduce_callee : Note<
   "selected 'operator<=>' for %select{|member|base class}0 %1 declared here">;
 def err_incorrect_defaulted_comparison_constexpr : Error<
diff --git a/clang/include/clang/Driver/CC1Options.td b/clang/include/clang/Driver/CC1Options.td
index 9387285518de..d1f5ec5a3d4c 100644
--- a/clang/include/clang/Driver/CC1Options.td
+++ b/clang/include/clang/Driver/CC1Options.td
@@ -859,6 +859,8 @@ def detailed_preprocessing_record : Flag<["-"], "detailed-preprocessing-record">
   HelpText<"include a detailed record of preprocessing actions">;
 def setup_static_analyzer : Flag<["-"], "setup-static-analyzer">,
   HelpText<"Set up preprocessor for static analyzer (done automatically when static analyzer is run).">;
+def disable_pragma_debug_crash : Flag<["-"], "disable-pragma-debug-crash">,
+  HelpText<"Disable any #pragma clang __debug that can lead to crashing behavior. This is meant for testing.">;
 
 //===----------------------------------------------------------------------===//
 // OpenCL Options
diff --git a/clang/include/clang/Driver/Job.h b/clang/include/clang/Driver/Job.h
index 0765b3c67d4e..9a3cad23363b 100644
--- a/clang/include/clang/Driver/Job.h
+++ b/clang/include/clang/Driver/Job.h
@@ -55,9 +55,6 @@ class Command {
   /// The list of program arguments which are inputs.
   llvm::opt::ArgStringList InputFilenames;
 
-  /// Whether to print the input filenames when executing.
-  bool PrintInputFilenames = false;
-
   /// Response file name, if this command is set to use one, or nullptr
   /// otherwise
   const char *ResponseFile = nullptr;
@@ -86,6 +83,12 @@ class Command {
   void writeResponseFile(raw_ostream &OS) const;
 
 public:
+  /// Whether to print the input filenames when executing.
+  bool PrintInputFilenames = false;
+
+  /// Whether the command will be executed in this process or not.
+  bool InProcess = false;
+
   Command(const Action &Source, const Tool &Creator, const char *Executable,
           const llvm::opt::ArgStringList &Arguments,
           ArrayRef<InputInfo> Inputs);
@@ -128,9 +131,6 @@ public:
   /// Print a command argument, and optionally quote it.
   static void printArg(llvm::raw_ostream &OS, StringRef Arg, bool Quote);
 
-  /// Set whether to print the input filenames when executing.
-  void setPrintInputFilenames(bool P) { PrintInputFilenames = P; }
-
 protected:
   /// Optionally print the filenames to be compiled
   void PrintFileNames() const;
@@ -139,7 +139,9 @@ protected:
 /// Use the CC1 tool callback when available, to avoid creating a new process
 class CC1Command : public Command {
 public:
-  using Command::Command;
+  CC1Command(const Action &Source, const Tool &Creator, const char *Executable,
+             const llvm::opt::ArgStringList &Arguments,
+             ArrayRef<InputInfo> Inputs);
 
   void Print(llvm::raw_ostream &OS, const char *Terminator, bool Quote,
              CrashReportInfo *CrashInfo = nullptr) const override;
diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h
index abffbd03c3b4..8b2146059f85 100644
--- a/clang/include/clang/Lex/PreprocessorOptions.h
+++ b/clang/include/clang/Lex/PreprocessorOptions.h
@@ -189,6 +189,9 @@ public:
   /// Set up preprocessor for RunAnalysis action.
   bool SetUpStaticAnalyzer = false;
 
+  /// Prevents intended crashes when using #pragma clang __debug. For testing.
+  bool DisablePragmaDebugCrash = false;
+
 public:
   PreprocessorOptions() : PrecompiledPreambleBytes(0, false) {}
 
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index a88dd2814487..697d1911be8f 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -6275,7 +6275,7 @@ public:
   /// \returns true if an error occurred and satisfaction could not be checked,
   /// false otherwise.
   bool CheckConstraintSatisfaction(
-      NamedDecl *Template, ArrayRef<const Expr *> ConstraintExprs,
+      const NamedDecl *Template, ArrayRef<const Expr *> ConstraintExprs,
       ArrayRef<TemplateArgument> TemplateArgs,
       SourceRange TemplateIDRange, ConstraintSatisfaction &Satisfaction);
 
@@ -6288,6 +6288,17 @@ public:
   bool CheckConstraintSatisfaction(const Expr *ConstraintExpr,
                                    ConstraintSatisfaction &Satisfaction);
 
+  /// Check whether the given function decl's trailing requires clause is
+  /// satisfied, if any. Returns false and updates Satisfaction with the
+  /// satisfaction verdict if successful, emits a diagnostic and returns true if
+  /// an error occured and satisfaction could not be determined.
+  ///
+  /// \returns true if an error occurred, false otherwise.
+  bool CheckFunctionConstraints(const FunctionDecl *FD,
+                                ConstraintSatisfaction &Satisfaction,
+                                SourceLocation UsageLoc = SourceLocation());
+
+
   /// \brief Ensure that the given template arguments satisfy the constraints
   /// associated with the given template, emitting a diagnostic if they do not.
   ///
@@ -6986,7 +6997,7 @@ public:
   /// Get a template argument mapping the given template parameter to itself,
   /// e.g. for X in \c template<int X>, this would return an expression template
   /// argument referencing X.
-  TemplateArgumentLoc getIdentityTemplateArgumentLoc(Decl *Param,
+  TemplateArgumentLoc getIdentityTemplateArgumentLoc(NamedDecl *Param,
                                                      SourceLocation Location);
 
   void translateTemplateArguments(const ASTTemplateArgsPtr &In,
diff --git a/clang/include/clang/Sema/SemaConcept.h b/clang/include/clang/Sema/SemaConcept.h
index 7fc42a4816ec..c5f9fc45612a 100644
--- a/clang/include/clang/Sema/SemaConcept.h
+++ b/clang/include/clang/Sema/SemaConcept.h
@@ -43,11 +43,15 @@ struct AtomicConstraint {
     if (ParameterMapping->size() != Other.ParameterMapping->size())
       return false;
 
-    for (unsigned I = 0, S = ParameterMapping->size(); I < S; ++I)
-      if (!C.getCanonicalTemplateArgument((*ParameterMapping)[I].getArgument())
-               .structurallyEquals(C.getCanonicalTemplateArgument(
-                  (*Other.ParameterMapping)[I].getArgument())))
+    for (unsigned I = 0, S = ParameterMapping->size(); I < S; ++I) {
+      llvm::FoldingSetNodeID IDA, IDB;
+      C.getCanonicalTemplateArgument((*ParameterMapping)[I].getArgument())
+          .Profile(IDA, C);
+      C.getCanonicalTemplateArgument((*Other.ParameterMapping)[I].getArgument())
+          .Profile(IDB, C);
+      if (IDA != IDB)
         return false;
+    }
     return true;
   }
 
diff --git a/clang/lib/AST/ASTConcept.cpp b/clang/lib/AST/ASTConcept.cpp
index c28a06bdf0b2..549088ad4a8a 100644
--- a/clang/lib/AST/ASTConcept.cpp
+++ b/clang/lib/AST/ASTConcept.cpp
@@ -59,8 +59,8 @@ ASTConstraintSatisfaction::Create(const ASTContext &C,
 }
 
 void ConstraintSatisfaction::Profile(
-    llvm::FoldingSetNodeID &ID, const ASTContext &C, NamedDecl *ConstraintOwner,
-    ArrayRef<TemplateArgument> TemplateArgs) {
+    llvm::FoldingSetNodeID &ID, const ASTContext &C,
+    const NamedDecl *ConstraintOwner, ArrayRef<TemplateArgument> TemplateArgs) {
   ID.AddPointer(ConstraintOwner);
   ID.AddInteger(TemplateArgs.size());
   for (auto &Arg : TemplateArgs)
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 6d1db38e36cc..1be72efe4de8 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -756,12 +756,8 @@ canonicalizeImmediatelyDeclaredConstraint(const ASTContext &C, Expr *IDC,
       NewConverted.push_back(Arg);
   }
   Expr *NewIDC = ConceptSpecializationExpr::Create(
-      C, NestedNameSpecifierLoc(), /*TemplateKWLoc=*/SourceLocation(),
-      CSE->getConceptNameInfo(), /*FoundDecl=*/CSE->getNamedConcept(),
-      CSE->getNamedConcept(),
-      // Actually canonicalizing a TemplateArgumentLoc is difficult so we
-      // simply omit the ArgsAsWritten
-      /*ArgsAsWritten=*/nullptr, NewConverted, nullptr);
+      C, CSE->getNamedConcept(), NewConverted, nullptr,
+      CSE->isInstantiationDependent(), CSE->containsUnexpandedParameterPack());
 
   if (auto *OrigFold = dyn_cast<CXXFoldExpr>(IDC))
     NewIDC = new (C) CXXFoldExpr(OrigFold->getType(), SourceLocation(), NewIDC,
diff --git a/clang/lib/AST/CXXInheritance.cpp b/clang/lib/AST/CXXInheritance.cpp
index a3a3794b2edd..0377bd324cb6 100644
--- a/clang/lib/AST/CXXInheritance.cpp
+++ b/clang/lib/AST/CXXInheritance.cpp
@@ -758,6 +758,8 @@ CXXRecordDecl::getFinalOverriders(CXXFinalOverriderMap &FinalOverriders) const {
         return false;
       };
 
+      // FIXME: IsHidden reads from Overriding from the middle of a remove_if
+      // over the same sequence! Is this guaranteed to work?
       Overriding.erase(
           std::remove_if(Overriding.begin(), Overriding.end(), IsHidden),
           Overriding.end());
diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp
index 48e310e858b2..227fe80ccab4 100644
--- a/clang/lib/AST/DeclCXX.cpp
+++ b/clang/lib/AST/DeclCXX.cpp
@@ -2038,17 +2038,36 @@ CXXMethodDecl::getCorrespondingMethodInClass(const CXXRecordDecl *RD,
   if (auto *MD = getCorrespondingMethodDeclaredInClass(RD, MayBeBase))
     return MD;
 
+  llvm::SmallVector<CXXMethodDecl*, 4> FinalOverriders;
+  auto AddFinalOverrider = [&](CXXMethodDecl *D) {
+    // If this function is overridden by a candidate final overrider, it is not
+    // a final overrider.
+    for (CXXMethodDecl *OtherD : FinalOverriders) {
+      if (declaresSameEntity(D, OtherD) || recursivelyOverrides(OtherD, D))
+        return;
+    }
+
+    // Other candidate final overriders might be overridden by this function.
+    FinalOverriders.erase(
+        std::remove_if(FinalOverriders.begin(), FinalOverriders.end(),
+                       [&](CXXMethodDecl *OtherD) {
+                         return recursivelyOverrides(D, OtherD);
+                       }),
+        FinalOverriders.end());
+
+    FinalOverriders.push_back(D);
+  };
+
   for (const auto &I : RD->bases()) {
     const RecordType *RT = I.getType()->getAs<RecordType>();
     if (!RT)
       continue;
     const auto *Base = cast<CXXRecordDecl>(RT->getDecl());
-    CXXMethodDecl *T = this->getCorrespondingMethodInClass(Base);
-    if (T)
-      return T;
+    if (CXXMethodDecl *D = this->getCorrespondingMethodInClass(Base))
+      AddFinalOverrider(D);
   }
 
-  return nullptr;
+  return FinalOverriders.size() == 1 ? FinalOverriders.front() : nullptr;
 }
 
 CXXMethodDecl *CXXMethodDecl::Create(ASTContext &C, CXXRecordDecl *RD,
@@ -2105,6 +2124,11 @@ CXXMethodDecl *CXXMethodDecl::getDevirtualizedMethod(const Expr *Base,
   CXXMethodDecl *DevirtualizedMethod =
       getCorrespondingMethodInClass(BestDynamicDecl);
 
+  // If there final overrider in the dynamic type is ambiguous, we can't
+  // devirtualize this call.
+  if (!DevirtualizedMethod)
+    return nullptr;
+
   // If that method is pure virtual, we can't devirtualize. If this code is
   // reached, the result would be UB, not a direct call to the derived class
   // function, and we can't assume the derived class function is defined.
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 835198958766..fea7d606f261 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -1685,6 +1685,11 @@ MemberExpr *MemberExpr::Create(
     CXXRecordDecl *RD = dyn_cast_or_null<CXXRecordDecl>(DC);
     if (RD && RD->isDependentContext() && RD->isCurrentInstantiation(DC))
       E->setTypeDependent(T->isDependentType());
+
+    // Bitfield with value-dependent width is type-dependent.
+    FieldDecl *FD = dyn_cast<FieldDecl>(MemberDecl);
+    if (FD && FD->isBitField() && FD->getBitWidth()->isValueDependent())
+      E->setTypeDependent(true);
   }
 
   if (HasQualOrFound) {
diff --git a/clang/lib/AST/ExprConcepts.cpp b/clang/lib/AST/ExprConcepts.cpp
index 76d57ed5d5b1..b5a3686dc99a 100644
--- a/clang/lib/AST/ExprConcepts.cpp
+++ b/clang/lib/AST/ExprConcepts.cpp
@@ -46,24 +46,12 @@ ConceptSpecializationExpr::ConceptSpecializationExpr(const ASTContext &C,
                    ASTConstraintSatisfaction::Create(C, *Satisfaction) :
                    nullptr) {
   setTemplateArguments(ConvertedArgs);
-}
-
-ConceptSpecializationExpr::ConceptSpecializationExpr(EmptyShell Empty,
-    unsigned NumTemplateArgs)
-    : Expr(ConceptSpecializationExprClass, Empty), ConceptReference(),
-      NumTemplateArgs(NumTemplateArgs) { }
-
-void ConceptSpecializationExpr::setTemplateArguments(
-    ArrayRef<TemplateArgument> Converted) {
-  assert(Converted.size() == NumTemplateArgs);
-  std::uninitialized_copy(Converted.begin(), Converted.end(),
-                          getTrailingObjects<TemplateArgument>());
   bool IsInstantiationDependent = false;
   bool ContainsUnexpandedParameterPack = false;
-  for (const TemplateArgument& Arg : Converted) {
-    if (Arg.isInstantiationDependent())
+  for (const TemplateArgumentLoc& ArgLoc : ArgsAsWritten->arguments()) {
+    if (ArgLoc.getArgument().isInstantiationDependent())
       IsInstantiationDependent = true;
-    if (Arg.containsUnexpandedParameterPack())
+    if (ArgLoc.getArgument().containsUnexpandedParameterPack())
       ContainsUnexpandedParameterPack = true;
     if (ContainsUnexpandedParameterPack && IsInstantiationDependent)
       break;
@@ -80,6 +68,18 @@ void ConceptSpecializationExpr::setTemplateArguments(
          "should not be value-dependent");
 }
 
+ConceptSpecializationExpr::ConceptSpecializationExpr(EmptyShell Empty,
+    unsigned NumTemplateArgs)
+    : Expr(ConceptSpecializationExprClass, Empty), ConceptReference(),
+      NumTemplateArgs(NumTemplateArgs) { }
+
+void ConceptSpecializationExpr::setTemplateArguments(
+    ArrayRef<TemplateArgument> Converted) {
+  assert(Converted.size() == NumTemplateArgs);
+  std::uninitialized_copy(Converted.begin(), Converted.end(),
+                          getTrailingObjects<TemplateArgument>());
+}
+
 ConceptSpecializationExpr *
 ConceptSpecializationExpr::Create(const ASTContext &C,
                                   NestedNameSpecifierLoc NNS,
@@ -98,6 +98,39 @@ ConceptSpecializationExpr::Create(const ASTContext &C,
                                                 ConvertedArgs, Satisfaction);
 }
 
+ConceptSpecializationExpr::ConceptSpecializationExpr(
+    const ASTContext &C, ConceptDecl *NamedConcept,
+    ArrayRef<TemplateArgument> ConvertedArgs,
+    const ConstraintSatisfaction *Satisfaction, bool Dependent,
+    bool ContainsUnexpandedParameterPack)
+    : Expr(ConceptSpecializationExprClass, C.BoolTy, VK_RValue, OK_Ordinary,
+           /*TypeDependent=*/false,
+           /*ValueDependent=*/!Satisfaction, Dependent,
+           ContainsUnexpandedParameterPack),
+      ConceptReference(NestedNameSpecifierLoc(), SourceLocation(),
+                       DeclarationNameInfo(), NamedConcept,
+                       NamedConcept, nullptr),
+      NumTemplateArgs(ConvertedArgs.size()),
+      Satisfaction(Satisfaction ?
+                   ASTConstraintSatisfaction::Create(C, *Satisfaction) :
+                   nullptr) {
+  setTemplateArguments(ConvertedArgs);
+}
+
+ConceptSpecializationExpr *
+ConceptSpecializationExpr::Create(const ASTContext &C,
+                                  ConceptDecl *NamedConcept,
+                                  ArrayRef<TemplateArgument> ConvertedArgs,
+                                  const ConstraintSatisfaction *Satisfaction,
+                                  bool Dependent,
+                                  bool ContainsUnexpandedParameterPack) {
+  void *Buffer = C.Allocate(totalSizeToAlloc<TemplateArgument>(
+                                ConvertedArgs.size()));
+  return new (Buffer) ConceptSpecializationExpr(
+      C, NamedConcept, ConvertedArgs, Satisfaction, Dependent,
+      ContainsUnexpandedParameterPack);
+}
+
 ConceptSpecializationExpr *
 ConceptSpecializationExpr::Create(ASTContext &C, EmptyShell Empty,
                                   unsigned NumTemplateArgs) {
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index 382ea5c8d7ef..60dec50d53da 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -1535,8 +1535,8 @@ static Stmt::StmtClass DecodeOperatorCall(const CXXOperatorCallExpr *S,
     return Stmt::BinaryOperatorClass;
 
   case OO_Spaceship:
-    // FIXME: Update this once we support <=> expressions.
-    llvm_unreachable("<=> expressions not supported yet");
+    BinaryOp = BO_Cmp;
+    return Stmt::BinaryOperatorClass;
 
   case OO_AmpAmp:
     BinaryOp = BO_LAnd;
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index f2b6c8cd3ee9..e06d120c58bf 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -2,6 +2,7 @@
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/VersionTuple.h"
 
@@ -31,8 +32,8 @@ const char *CudaVersionToString(CudaVersion V) {
   llvm_unreachable("invalid enum");
 }
 
-CudaVersion CudaStringToVersion(llvm::StringRef S) {
-  return llvm::StringSwitch<CudaVersion>(S)
+CudaVersion CudaStringToVersion(const llvm::Twine &S) {
+  return llvm::StringSwitch<CudaVersion>(S.str())
       .Case("7.0", CudaVersion::CUDA_70)
       .Case("7.5", CudaVersion::CUDA_75)
       .Case("8.0", CudaVersion::CUDA_80)
@@ -40,7 +41,8 @@ CudaVersion CudaStringToVersion(llvm::StringRef S) {
       .Case("9.1", CudaVersion::CUDA_91)
       .Case("9.2", CudaVersion::CUDA_92)
       .Case("10.0", CudaVersion::CUDA_100)
-      .Case("10.1", CudaVersion::CUDA_101);
+      .Case("10.1", CudaVersion::CUDA_101)
+      .Default(CudaVersion::UNKNOWN);
 }
 
 const char *CudaArchToString(CudaArch A) {
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 57beda26677c..f8866ac4f7f6 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -537,6 +537,13 @@ void CodeGenModule::Release() {
     getModule().addModuleFlag(llvm::Module::Error, "min_enum_size", EnumWidth);
   }
 
+  if (Arch == llvm::Triple::riscv32 || Arch == llvm::Triple::riscv64) {
+    StringRef ABIStr = Target.getABI();
+    llvm::LLVMContext &Ctx = TheModule.getContext();
+    getModule().addModuleFlag(llvm::Module::Error, "target-abi",
+                              llvm::MDString::get(Ctx, ABIStr));
+  }
+
   if (CodeGenOpts.SanitizeCfiCrossDso) {
     // Indicate that we want cross-DSO control flow integrity checks.
     getModule().addModuleFlag(llvm::Module::Override, "Cross-DSO CFI", 1);
diff --git a/clang/lib/Driver/Compilation.cpp b/clang/lib/Driver/Compilation.cpp
index 25aec3690f21..52477576b2eb 100644
--- a/clang/lib/Driver/Compilation.cpp
+++ b/clang/lib/Driver/Compilation.cpp
@@ -258,14 +258,23 @@ void Compilation::initCompilationForDiagnostics() {
 
   // Remove any user specified output.  Claim any unclaimed arguments, so as
   // to avoid emitting warnings about unused args.
-  OptSpecifier OutputOpts[] = { options::OPT_o, options::OPT_MD,
-                                options::OPT_MMD };
+  OptSpecifier OutputOpts[] = {
+      options::OPT_o,  options::OPT_MD, options::OPT_MMD, options::OPT_M,
+      options::OPT_MM, options::OPT_MF, options::OPT_MG,  options::OPT_MJ,
+      options::OPT_MQ, options::OPT_MT, options::OPT_MV};
   for (unsigned i = 0, e = llvm::array_lengthof(OutputOpts); i != e; ++i) {
     if (TranslatedArgs->hasArg(OutputOpts[i]))
       TranslatedArgs->eraseArg(OutputOpts[i]);
   }
   TranslatedArgs->ClaimAllArgs();
 
+  // Force re-creation of the toolchain Args, otherwise our modifications just
+  // above will have no effect.
+  for (auto Arg : TCArgs)
+    if (Arg.second != TranslatedArgs)
+      delete Arg.second;
+  TCArgs.clear();
+
   // Redirect stdout/stderr to /dev/null.
   Redirects = {None, {""}, {""}};
 
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 7ee3caaa0bce..fb8335a3695d 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -3757,6 +3757,11 @@ void Driver::BuildJobs(Compilation &C) const {
                        /*TargetDeviceOffloadKind*/ Action::OFK_None);
   }
 
+  // If we have more than one job, then disable integrated-cc1 for now.
+  if (C.getJobs().size() > 1)
+    for (auto &J : C.getJobs())
+      J.InProcess = false;
+
   // If the user passed -Qunused-arguments or there were errors, don't warn
   // about any unused arguments.
   if (Diags.hasErrorOccurred() ||
diff --git a/clang/lib/Driver/Job.cpp b/clang/lib/Driver/Job.cpp
index 7dab2a022d92..6d1e7e61ba1d 100644
--- a/clang/lib/Driver/Job.cpp
+++ b/clang/lib/Driver/Job.cpp
@@ -371,14 +371,29 @@ int Command::Execute(ArrayRef<llvm::Optional<StringRef>> Redirects,
                                    /*memoryLimit*/ 0, ErrMsg, ExecutionFailed);
 }
 
+CC1Command::CC1Command(const Action &Source, const Tool &Creator,
+                       const char *Executable,
+                       const llvm::opt::ArgStringList &Arguments,
+                       ArrayRef<InputInfo> Inputs)
+    : Command(Source, Creator, Executable, Arguments, Inputs) {
+  InProcess = true;
+}
+
 void CC1Command::Print(raw_ostream &OS, const char *Terminator, bool Quote,
                        CrashReportInfo *CrashInfo) const {
-  OS << " (in-process)\n";
+  if (InProcess)
+    OS << " (in-process)\n";
   Command::Print(OS, Terminator, Quote, CrashInfo);
 }
 
-int CC1Command::Execute(ArrayRef<llvm::Optional<StringRef>> /*Redirects*/,
+int CC1Command::Execute(ArrayRef<llvm::Optional<StringRef>> Redirects,
                         std::string *ErrMsg, bool *ExecutionFailed) const {
+  // FIXME: Currently, if there're more than one job, we disable
+  // -fintegrate-cc1. If we're no longer a integrated-cc1 job, fallback to
+  // out-of-process execution. See discussion in https://reviews.llvm.org/D74447
+  if (!InProcess)
+    return Command::Execute(Redirects, ErrMsg, ExecutionFailed);
+
   PrintFileNames();
 
   SmallVector<const char *, 128> Argv;
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 647465863d3e..aec1971214cf 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -4679,6 +4679,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
                                                  : "-");
   }
 
+  // Give the gen diagnostics more chances to succeed, by avoiding intentional
+  // crashes.
+  if (D.CCGenDiagnostics)
+    CmdArgs.push_back("-disable-pragma-debug-crash");
+
   bool UseSeparateSections = isUseSeparateSections(Triple);
 
   if (Args.hasFlag(options::OPT_ffunction_sections,
@@ -6048,7 +6053,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   if (Output.getType() == types::TY_Object &&
       Args.hasFlag(options::OPT__SLASH_showFilenames,
                    options::OPT__SLASH_showFilenames_, false)) {
-    C.getJobs().getJobs().back()->setPrintInputFilenames(true);
+    C.getJobs().getJobs().back()->PrintInputFilenames = true;
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_pg))
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index 02871d2ce411..8a7da4f86b39 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -32,37 +32,24 @@ using namespace llvm::opt;
 
 // Parses the contents of version.txt in an CUDA installation.  It should
 // contain one line of the from e.g. "CUDA Version 7.5.2".
-static CudaVersion ParseCudaVersionFile(llvm::StringRef V) {
+static CudaVersion ParseCudaVersionFile(const Driver &D, llvm::StringRef V) {
   if (!V.startswith("CUDA Version "))
     return CudaVersion::UNKNOWN;
   V = V.substr(strlen("CUDA Version "));
-  int Major = -1, Minor = -1;
-  auto First = V.split('.');
-  auto Second = First.second.split('.');
-  if (First.first.getAsInteger(10, Major) ||
-      Second.first.getAsInteger(10, Minor))
+  SmallVector<StringRef,4> VersionParts;
+  V.split(VersionParts, '.');
+  if (VersionParts.size() < 2)
     return CudaVersion::UNKNOWN;
-
-  if (Major == 7 && Minor == 0) {
-    // This doesn't appear to ever happen -- version.txt doesn't exist in the
-    // CUDA 7 installs I've seen.  But no harm in checking.
-    return CudaVersion::CUDA_70;
-  }
-  if (Major == 7 && Minor == 5)
-    return CudaVersion::CUDA_75;
-  if (Major == 8 && Minor == 0)
-    return CudaVersion::CUDA_80;
-  if (Major == 9 && Minor == 0)
-    return CudaVersion::CUDA_90;
-  if (Major == 9 && Minor == 1)
-    return CudaVersion::CUDA_91;
-  if (Major == 9 && Minor == 2)
-    return CudaVersion::CUDA_92;
-  if (Major == 10 && Minor == 0)
-    return CudaVersion::CUDA_100;
-  if (Major == 10 && Minor == 1)
-    return CudaVersion::CUDA_101;
-  return CudaVersion::UNKNOWN;
+  std::string MajorMinor = join_items(".", VersionParts[0], VersionParts[1]);
+  CudaVersion Version = CudaStringToVersion(MajorMinor);
+  if (Version != CudaVersion::UNKNOWN)
+    return Version;
+
+  // Issue a warning and assume that the version we've found is compatible with
+  // the latest version we support.
+  D.Diag(diag::warn_drv_unknown_cuda_version)
+      << MajorMinor << CudaVersionToString(CudaVersion::LATEST);
+  return CudaVersion::LATEST;
 }
 
 CudaInstallationDetector::CudaInstallationDetector(
@@ -160,7 +147,7 @@ CudaInstallationDetector::CudaInstallationDetector(
       // version.txt isn't present.
       Version = CudaVersion::CUDA_70;
     } else {
-      Version = ParseCudaVersionFile((*VersionFile)->getBuffer());
+      Version = ParseCudaVersionFile(D, (*VersionFile)->getBuffer());
     }
 
     if (Version >= CudaVersion::CUDA_90) {
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 4e5babdbaa03..e98a407ac42f 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -3440,6 +3440,7 @@ static void ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args,
     Opts.LexEditorPlaceholders = false;
 
   Opts.SetUpStaticAnalyzer = Args.hasArg(OPT_setup_static_analyzer);
+  Opts.DisablePragmaDebugCrash = Args.hasArg(OPT_disable_pragma_debug_crash);
 }
 
 static void ParsePreprocessorOutputArgs(PreprocessorOutputOptions &Opts,
diff --git a/clang/lib/Headers/__clang_cuda_intrinsics.h b/clang/lib/Headers/__clang_cuda_intrinsics.h
index b67461a146fc..c7bff6a9d8fe 100644
--- a/clang/lib/Headers/__clang_cuda_intrinsics.h
+++ b/clang/lib/Headers/__clang_cuda_intrinsics.h
@@ -45,7 +45,7 @@
     _Static_assert(sizeof(__val) == sizeof(__Bits));                           \
     _Static_assert(sizeof(__Bits) == 2 * sizeof(int));                         \
     __Bits __tmp;                                                              \
-    memcpy(&__val, &__tmp, sizeof(__val));                                     \
+    memcpy(&__tmp, &__val, sizeof(__val));                                \
     __tmp.__a = ::__FnName(__tmp.__a, __offset, __width);                      \
     __tmp.__b = ::__FnName(__tmp.__b, __offset, __width);                      \
     long long __ret;                                                           \
@@ -129,7 +129,7 @@ __MAKE_SHUFFLES(__shfl_xor, __nvvm_shfl_bfly_i32, __nvvm_shfl_bfly_f32, 0x1f,
     _Static_assert(sizeof(__val) == sizeof(__Bits));                           \
     _Static_assert(sizeof(__Bits) == 2 * sizeof(int));                         \
     __Bits __tmp;                                                              \
-    memcpy(&__val, &__tmp, sizeof(__val));                                     \
+    memcpy(&__tmp, &__val, sizeof(__val));                                     \
     __tmp.__a = ::__FnName(__mask, __tmp.__a, __offset, __width);              \
     __tmp.__b = ::__FnName(__mask, __tmp.__b, __offset, __width);              \
     long long __ret;                                                           \
diff --git a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
index 3e362dd967db..e91de3c81dbd 100644
--- a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
+++ b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
@@ -48,7 +48,7 @@
 #include "cuda.h"
 #if !defined(CUDA_VERSION)
 #error "cuda.h did not define CUDA_VERSION"
-#elif CUDA_VERSION < 7000 || CUDA_VERSION > 10010
+#elif CUDA_VERSION < 7000
 #error "Unsupported CUDA version!"
 #endif
 
diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h
index 0e61eab44aeb..9b8de63f04d5 100644
--- a/clang/lib/Headers/xmmintrin.h
+++ b/clang/lib/Headers/xmmintrin.h
@@ -2181,7 +2181,7 @@ void _mm_sfence(void);
 ///    3: Bits [63:48] are copied to the destination.
 /// \returns A 16-bit integer containing the extracted 16 bits of packed data.
 #define _mm_extract_pi16(a, n) \
-  (int)__builtin_ia32_vec_ext_v4hi((__m64)a, (int)n)
+  (int)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n)
 
 /// Copies data from the 64-bit vector of [4 x i16] to the destination,
 ///    and inserts the lower 16-bits of an integer operand at the 16-bit offset
@@ -2212,7 +2212,7 @@ void _mm_sfence(void);
 /// \returns A 64-bit integer vector containing the copied packed data from the
 ///    operands.
 #define _mm_insert_pi16(a, d, n) \
-  (__m64)__builtin_ia32_vec_set_v4hi((__m64)a, (int)d, (int)n)
+  (__m64)__builtin_ia32_vec_set_v4hi((__v4hi)a, (int)d, (int)n)
 
 /// Compares each of the corresponding packed 16-bit integer values of
 ///    the 64-bit integer vectors, and writes the greater value to the
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 648bda270578..981111d03744 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -2552,8 +2552,8 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr,
         '/', '/', '/', '/',  '/', '/', '/', '/',
         '/', '/', '/', '/',  '/', '/', '/', '/'
       };
-      while (CurPtr+16 <= BufferEnd &&
-             !vec_any_eq(*(const vector unsigned char*)CurPtr, Slashes))
+      while (CurPtr + 16 <= BufferEnd &&
+             !vec_any_eq(*(const __vector unsigned char *)CurPtr, Slashes))
         CurPtr += 16;
 #else
       // Scan for '/' quickly.  Many block comments are very large.
diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp
index e4636265a72b..a8cd18b123b0 100644
--- a/clang/lib/Lex/Pragma.cpp
+++ b/clang/lib/Lex/Pragma.cpp
@@ -30,6 +30,7 @@
 #include "clang/Lex/PPCallbacks.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Lex/PreprocessorLexer.h"
+#include "clang/Lex/PreprocessorOptions.h"
 #include "clang/Lex/Token.h"
 #include "clang/Lex/TokenLexer.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -39,7 +40,6 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Support/CrashRecoveryContext.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <algorithm>
@@ -1035,15 +1035,19 @@ struct PragmaDebugHandler : public PragmaHandler {
     IdentifierInfo *II = Tok.getIdentifierInfo();
 
     if (II->isStr("assert")) {
-      llvm_unreachable("This is an assertion!");
+      if (!PP.getPreprocessorOpts().DisablePragmaDebugCrash)
+        llvm_unreachable("This is an assertion!");
     } else if (II->isStr("crash")) {
-      LLVM_BUILTIN_TRAP;
+      if (!PP.getPreprocessorOpts().DisablePragmaDebugCrash)
+        LLVM_BUILTIN_TRAP;
     } else if (II->isStr("parser_crash")) {
-      Token Crasher;
-      Crasher.startToken();
-      Crasher.setKind(tok::annot_pragma_parser_crash);
-      Crasher.setAnnotationRange(SourceRange(Tok.getLocation()));
-      PP.EnterToken(Crasher, /*IsReinject*/false);
+      if (!PP.getPreprocessorOpts().DisablePragmaDebugCrash) {
+        Token Crasher;
+        Crasher.startToken();
+        Crasher.setKind(tok::annot_pragma_parser_crash);
+        Crasher.setAnnotationRange(SourceRange(Tok.getLocation()));
+        PP.EnterToken(Crasher, /*IsReinject*/ false);
+      }
     } else if (II->isStr("dump")) {
       Token Identifier;
       PP.LexUnexpandedToken(Identifier);
@@ -1075,9 +1079,11 @@ struct PragmaDebugHandler : public PragmaHandler {
             << II->getName();
       }
     } else if (II->isStr("llvm_fatal_error")) {
-      llvm::report_fatal_error("#pragma clang __debug llvm_fatal_error");
+      if (!PP.getPreprocessorOpts().DisablePragmaDebugCrash)
+        llvm::report_fatal_error("#pragma clang __debug llvm_fatal_error");
     } else if (II->isStr("llvm_unreachable")) {
-      llvm_unreachable("#pragma clang __debug llvm_unreachable");
+      if (!PP.getPreprocessorOpts().DisablePragmaDebugCrash)
+        llvm_unreachable("#pragma clang __debug llvm_unreachable");
     } else if (II->isStr("macro")) {
       Token MacroName;
       PP.LexUnexpandedToken(MacroName);
@@ -1104,11 +1110,8 @@ struct PragmaDebugHandler : public PragmaHandler {
       }
       M->dump();
     } else if (II->isStr("overflow_stack")) {
-      DebugOverflowStack();
-    } else if (II->isStr("handle_crash")) {
-      llvm::CrashRecoveryContext *CRC =llvm::CrashRecoveryContext::GetCurrent();
-      if (CRC)
-        CRC->HandleCrash();
+      if (!PP.getPreprocessorOpts().DisablePragmaDebugCrash)
+        DebugOverflowStack();
     } else if (II->isStr("captured")) {
       HandleCaptured(PP);
     } else {
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index 4af993c4527f..cdc3506d5c68 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -5060,6 +5060,8 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) {
     // recurse to handle whatever we get.
     if (TryAnnotateTypeOrScopeToken())
       return true;
+    if (TryAnnotateTypeConstraint())
+      return true;
     if (Tok.is(tok::identifier))
       return false;
 
@@ -5192,11 +5194,14 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) {
 
     // placeholder-type-specifier
   case tok::annot_template_id: {
-    TemplateIdAnnotation *TemplateId = takeTemplateIdAnnotation(Tok);
-    return TemplateId->Kind == TNK_Concept_template &&
+    return isTypeConstraintAnnotation() &&
         (NextToken().is(tok::kw_auto) || NextToken().is(tok::kw_decltype));
   }
-
+  case tok::annot_cxxscope:
+    if (NextToken().is(tok::identifier) && TryAnnotateTypeConstraint())
+      return true;
+    return isTypeConstraintAnnotation() &&
+        GetLookAheadToken(2).isOneOf(tok::kw_auto, tok::kw_decltype);
   case tok::kw___declspec:
   case tok::kw___cdecl:
   case tok::kw___stdcall:
diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp
index f872aa3a950c..09e5c7996fcd 100644
--- a/clang/lib/Parse/ParseDeclCXX.cpp
+++ b/clang/lib/Parse/ParseDeclCXX.cpp
@@ -2716,7 +2716,7 @@ Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
     // C++11 [dcl.attr.grammar] p4: If an attribute-specifier-seq appertains
     // to a friend declaration, that declaration shall be a definition.
     if (DeclaratorInfo.isFunctionDeclarator() &&
-        DefinitionKind != FDK_Definition && DS.isFriendSpecified()) {
+        DefinitionKind == FDK_Declaration && DS.isFriendSpecified()) {
       // Diagnose attributes that appear before decl specifier:
       // [[]] friend int foo();
       ProhibitAttributes(FnAttrs);
diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp
index 036eabb94dd7..17f81ec96c1f 100644
--- a/clang/lib/Parse/ParseExprCXX.cpp
+++ b/clang/lib/Parse/ParseExprCXX.cpp
@@ -3374,25 +3374,6 @@ ExprResult Parser::ParseRequiresExpression() {
           Diag(Tok, diag::err_requires_expr_missing_arrow)
               << FixItHint::CreateInsertion(Tok.getLocation(), "->");
         // Try to parse a 'type-constraint'
-        CXXScopeSpec SS;
-        if (ParseOptionalCXXScopeSpecifier(SS, ParsedType(),
-                                           /*EnteringContext=*/false,
-                                           /*MayBePseudoDestructor=*/nullptr,
-                                           // If this is not a type-constraint,
-                                           // then this scope-spec is part of
-                                           // the typename of a non-type
-                                           // template parameter
-                                           /*IsTypename=*/true,
-                                           /*LastII=*/nullptr,
-                                           // We won't find concepts in
-                                           // non-namespaces anyway, so might as
-                                           // well parse this correctly for
-                                           // possible type names.
-                                           /*OnlyNamespace=*/false,
-                                           /*SuppressDiagnostic=*/true)) {
-          SkipUntil(tok::semi, tok::r_brace, SkipUntilFlags::StopBeforeMatch);
-          break;
-        }
         if (TryAnnotateTypeConstraint()) {
           SkipUntil(tok::semi, tok::r_brace, SkipUntilFlags::StopBeforeMatch);
           break;
@@ -3402,8 +3383,13 @@ ExprResult Parser::ParseRequiresExpression() {
           SkipUntil(tok::semi, tok::r_brace, SkipUntilFlags::StopBeforeMatch);
           break;
         }
-        if (Tok.is(tok::annot_cxxscope))
+        CXXScopeSpec SS;
+        if (Tok.is(tok::annot_cxxscope)) {
+          Actions.RestoreNestedNameSpecifierAnnotation(Tok.getAnnotationValue(),
+                                                       Tok.getAnnotationRange(),
+                                                       SS);
           ConsumeAnnotationToken();
+        }
 
         Req = Actions.ActOnCompoundRequirement(
             Expression.get(), NoexceptLoc, SS, takeTemplateIdAnnotation(Tok),
@@ -3490,6 +3476,7 @@ ExprResult Parser::ParseRequiresExpression() {
           // We need to consume the typename to allow 'requires { typename a; }'
           SourceLocation TypenameKWLoc = ConsumeToken();
           if (TryAnnotateCXXScopeToken()) {
+            TPA.Commit();
             SkipUntil(tok::semi, tok::r_brace, SkipUntilFlags::StopBeforeMatch);
             break;
           }
diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp
index a905ebc67305..7a8cbca1e3f1 100644
--- a/clang/lib/Sema/SemaCast.cpp
+++ b/clang/lib/Sema/SemaCast.cpp
@@ -2311,6 +2311,24 @@ static TryCastResult TryReinterpretCast(Sema &Self, ExprResult &SrcExpr,
     return SuccessResult;
   }
 
+  // Diagnose address space conversion in nested pointers.
+  QualType DestPtee = DestType->getPointeeType().isNull()
+                          ? DestType->getPointeeType()
+                          : DestType->getPointeeType()->getPointeeType();
+  QualType SrcPtee = SrcType->getPointeeType().isNull()
+                         ? SrcType->getPointeeType()
+                         : SrcType->getPointeeType()->getPointeeType();
+  while (!DestPtee.isNull() && !SrcPtee.isNull()) {
+    if (DestPtee.getAddressSpace() != SrcPtee.getAddressSpace()) {
+      Self.Diag(OpRange.getBegin(),
+                diag::warn_bad_cxx_cast_nested_pointer_addr_space)
+          << CStyle << SrcType << DestType << SrcExpr.get()->getSourceRange();
+      break;
+    }
+    DestPtee = DestPtee->getPointeeType();
+    SrcPtee = SrcPtee->getPointeeType();
+  }
+
   // C++ 5.2.10p7: A pointer to an object can be explicitly converted to
   //   a pointer to an object of different type.
   // Void pointers are not specified, but supported by every compiler out there.
diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp
index 81601b09ce0d..290e4cbff4fd 100644
--- a/clang/lib/Sema/SemaConcept.cpp
+++ b/clang/lib/Sema/SemaConcept.cpp
@@ -167,9 +167,8 @@ calculateConstraintSatisfaction(Sema &S, const Expr *ConstraintExpr,
   return false;
 }
 
-template <typename TemplateDeclT>
 static bool calculateConstraintSatisfaction(
-    Sema &S, TemplateDeclT *Template, ArrayRef<TemplateArgument> TemplateArgs,
+    Sema &S, const NamedDecl *Template, ArrayRef<TemplateArgument> TemplateArgs,
     SourceLocation TemplateNameLoc, MultiLevelTemplateArgumentList &MLTAL,
     const Expr *ConstraintExpr, ConstraintSatisfaction &Satisfaction) {
   return calculateConstraintSatisfaction(
@@ -182,8 +181,9 @@ static bool calculateConstraintSatisfaction(
         {
           TemplateDeductionInfo Info(TemplateNameLoc);
           Sema::InstantiatingTemplate Inst(S, AtomicExpr->getBeginLoc(),
-              Sema::InstantiatingTemplate::ConstraintSubstitution{}, Template,
-              Info, AtomicExpr->getSourceRange());
+              Sema::InstantiatingTemplate::ConstraintSubstitution{},
+              const_cast<NamedDecl *>(Template), Info,
+              AtomicExpr->getSourceRange());
           if (Inst.isInvalid())
             return ExprError();
           // We do not want error diagnostics escaping here.
@@ -230,8 +230,7 @@ static bool calculateConstraintSatisfaction(
       });
 }
 
-template<typename TemplateDeclT>
-static bool CheckConstraintSatisfaction(Sema &S, TemplateDeclT *Template,
+static bool CheckConstraintSatisfaction(Sema &S, const NamedDecl *Template,
                                         ArrayRef<const Expr *> ConstraintExprs,
                                         ArrayRef<TemplateArgument> TemplateArgs,
                                         SourceRange TemplateIDRange,
@@ -249,8 +248,8 @@ static bool CheckConstraintSatisfaction(Sema &S, TemplateDeclT *Template,
     }
 
   Sema::InstantiatingTemplate Inst(S, TemplateIDRange.getBegin(),
-      Sema::InstantiatingTemplate::ConstraintsCheck{}, Template, TemplateArgs,
-      TemplateIDRange);
+      Sema::InstantiatingTemplate::ConstraintsCheck{},
+      const_cast<NamedDecl *>(Template), TemplateArgs, TemplateIDRange);
   if (Inst.isInvalid())
     return true;
 
@@ -273,7 +272,7 @@ static bool CheckConstraintSatisfaction(Sema &S, TemplateDeclT *Template,
 }
 
 bool Sema::CheckConstraintSatisfaction(
-    NamedDecl *Template, ArrayRef<const Expr *> ConstraintExprs,
+    const NamedDecl *Template, ArrayRef<const Expr *> ConstraintExprs,
     ArrayRef<TemplateArgument> TemplateArgs, SourceRange TemplateIDRange,
     ConstraintSatisfaction &OutSatisfaction) {
   if (ConstraintExprs.empty()) {
@@ -284,7 +283,8 @@ bool Sema::CheckConstraintSatisfaction(
   llvm::FoldingSetNodeID ID;
   void *InsertPos;
   ConstraintSatisfaction *Satisfaction = nullptr;
-  if (LangOpts.ConceptSatisfactionCaching) {
+  bool ShouldCache = LangOpts.ConceptSatisfactionCaching && Template;
+  if (ShouldCache) {
     ConstraintSatisfaction::Profile(ID, Context, Template, TemplateArgs);
     Satisfaction = SatisfactionCache.FindNodeOrInsertPos(ID, InsertPos);
     if (Satisfaction) {
@@ -295,27 +295,15 @@ bool Sema::CheckConstraintSatisfaction(
   } else {
     Satisfaction = &OutSatisfaction;
   }
-  bool Failed;
-  if (auto *T = dyn_cast<TemplateDecl>(Template))
-    Failed = ::CheckConstraintSatisfaction(*this, T, ConstraintExprs,
-                                           TemplateArgs, TemplateIDRange,
-                                           *Satisfaction);
-  else if (auto *P =
-               dyn_cast<ClassTemplatePartialSpecializationDecl>(Template))
-    Failed = ::CheckConstraintSatisfaction(*this, P, ConstraintExprs,
-                                           TemplateArgs, TemplateIDRange,
-                                           *Satisfaction);
-  else
-    Failed = ::CheckConstraintSatisfaction(
-        *this, cast<VarTemplatePartialSpecializationDecl>(Template),
-        ConstraintExprs, TemplateArgs, TemplateIDRange, *Satisfaction);
-  if (Failed) {
-    if (LangOpts.ConceptSatisfactionCaching)
+  if (::CheckConstraintSatisfaction(*this, Template, ConstraintExprs,
+                                    TemplateArgs, TemplateIDRange,
+                                    *Satisfaction)) {
+    if (ShouldCache)
       delete Satisfaction;
     return true;
   }
 
-  if (LangOpts.ConceptSatisfactionCaching) {
+  if (ShouldCache) {
     // We cannot use InsertNode here because CheckConstraintSatisfaction might
     // have invalidated it.
     SatisfactionCache.InsertNode(Satisfaction);
@@ -333,6 +321,30 @@ bool Sema::CheckConstraintSatisfaction(const Expr *ConstraintExpr,
       });
 }
 
+bool Sema::CheckFunctionConstraints(const FunctionDecl *FD,
+                                    ConstraintSatisfaction &Satisfaction,
+                                    SourceLocation UsageLoc) {
+  const Expr *RC = FD->getTrailingRequiresClause();
+  if (RC->isInstantiationDependent()) {
+    Satisfaction.IsSatisfied = true;
+    return false;
+  }
+  Qualifiers ThisQuals;
+  CXXRecordDecl *Record = nullptr;
+  if (auto *Method = dyn_cast<CXXMethodDecl>(FD)) {
+    ThisQuals = Method->getMethodQualifiers();
+    Record = const_cast<CXXRecordDecl *>(Method->getParent());
+  }
+  CXXThisScopeRAII ThisScope(*this, Record, ThisQuals, Record != nullptr);
+  // We substitute with empty arguments in order to rebuild the atomic
+  // constraint in a constant-evaluated context.
+  // FIXME: Should this be a dedicated TreeTransform?
+  return CheckConstraintSatisfaction(
+      FD, {RC}, /*TemplateArgs=*/{},
+      SourceRange(UsageLoc.isValid() ? UsageLoc : FD->getLocation()),
+      Satisfaction);
+}
+
 bool Sema::EnsureTemplateArgumentListConstraints(
     TemplateDecl *TD, ArrayRef<TemplateArgument> TemplateArgs,
     SourceRange TemplateIDRange) {
@@ -671,6 +683,10 @@ static bool substituteParameterMappings(Sema &S, NormalizedConstraint &N,
                   ArgsAsWritten->arguments().back().getSourceRange().getEnd()));
   if (S.SubstTemplateArguments(*Atomic.ParameterMapping, MLTAL, SubstArgs))
     return true;
+  Atomic.ParameterMapping.emplace(
+        MutableArrayRef<TemplateArgumentLoc>(
+            new (S.Context) TemplateArgumentLoc[SubstArgs.size()],
+            SubstArgs.size()));
   std::copy(SubstArgs.arguments().begin(), SubstArgs.arguments().end(),
             N.getAtomicConstraint()->ParameterMapping->begin());
   return false;
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 0bf490336537..64146f4a912f 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -12526,6 +12526,7 @@ void Sema::CheckCompleteVariableDeclaration(VarDecl *var) {
       var->getDeclContext()->getRedeclContext()->isFileContext() &&
       var->isExternallyVisible() && var->hasLinkage() &&
       !var->isInline() && !var->getDescribedVarTemplate() &&
+      !isa<VarTemplatePartialSpecializationDecl>(var) &&
       !isTemplateInstantiation(var->getTemplateSpecializationKind()) &&
       !getDiagnostics().isIgnored(diag::warn_missing_variable_declarations,
                                   var->getLocation())) {
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 9fa5691983a1..831e55046e80 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -7373,7 +7373,14 @@ private:
     ///   resolution [...]
     CandidateSet.exclude(FD);
 
-    S.LookupOverloadedBinOp(CandidateSet, OO, Fns, Args);
+    if (Args[0]->getType()->isOverloadableType())
+      S.LookupOverloadedBinOp(CandidateSet, OO, Fns, Args);
+    else {
+      // FIXME: We determine whether this is a valid expression by checking to
+      // see if there's a viable builtin operator candidate for it. That isn't
+      // really what the rules ask us to do, but should give the right results.
+      S.AddBuiltinOperatorCandidates(OO, FD->getLocation(), Args, CandidateSet);
+    }
 
     Result R;
 
@@ -7438,6 +7445,31 @@ private:
 
       if (OO == OO_Spaceship && FD->getReturnType()->isUndeducedAutoType()) {
         if (auto *BestFD = Best->Function) {
+          // If any callee has an undeduced return type, deduce it now.
+          // FIXME: It's not clear how a failure here should be handled. For
+          // now, we produce an eager diagnostic, because that is forward
+          // compatible with most (all?) other reasonable options.
+          if (BestFD->getReturnType()->isUndeducedType() &&
+              S.DeduceReturnType(BestFD, FD->getLocation(),
+                                 /*Diagnose=*/false)) {
+            // Don't produce a duplicate error when asked to explain why the
+            // comparison is deleted: we diagnosed that when initially checking
+            // the defaulted operator.
+            if (Diagnose == NoDiagnostics) {
+              S.Diag(
+                  FD->getLocation(),
+                  diag::err_defaulted_comparison_cannot_deduce_undeduced_auto)
+                  << Subobj.Kind << Subobj.Decl;
+              S.Diag(
+                  Subobj.Loc,
+                  diag::note_defaulted_comparison_cannot_deduce_undeduced_auto)
+                  << Subobj.Kind << Subobj.Decl;
+              S.Diag(BestFD->getLocation(),
+                     diag::note_defaulted_comparison_cannot_deduce_callee)
+                  << Subobj.Kind << Subobj.Decl;
+            }
+            return Result::deleted();
+          }
           if (auto *Info = S.Context.CompCategories.lookupInfoForType(
               BestFD->getCallResultType())) {
             R.Category = Info->Kind;
@@ -7826,10 +7858,14 @@ private:
       return StmtError();
 
     OverloadedOperatorKind OO = FD->getOverloadedOperator();
-    ExprResult Op = S.CreateOverloadedBinOp(
-        Loc, BinaryOperator::getOverloadedOpcode(OO), Fns,
-        Obj.first.get(), Obj.second.get(), /*PerformADL=*/true,
-        /*AllowRewrittenCandidates=*/true, FD);
+    BinaryOperatorKind Opc = BinaryOperator::getOverloadedOpcode(OO);
+    ExprResult Op;
+    if (Type->isOverloadableType())
+      Op = S.CreateOverloadedBinOp(Loc, Opc, Fns, Obj.first.get(),
+                                   Obj.second.get(), /*PerformADL=*/true,
+                                   /*AllowRewrittenCandidates=*/true, FD);
+    else
+      Op = S.CreateBuiltinBinOp(Loc, Opc, Obj.first.get(), Obj.second.get());
     if (Op.isInvalid())
       return StmtError();
 
@@ -7869,8 +7905,12 @@ private:
       llvm::APInt ZeroVal(S.Context.getIntWidth(S.Context.IntTy), 0);
       Expr *Zero =
           IntegerLiteral::Create(S.Context, ZeroVal, S.Context.IntTy, Loc);
-      ExprResult Comp = S.CreateOverloadedBinOp(Loc, BO_NE, Fns, VDRef.get(),
-                                                Zero, true, true, FD);
+      ExprResult Comp;
+      if (VDRef.get()->getType()->isOverloadableType())
+        Comp = S.CreateOverloadedBinOp(Loc, BO_NE, Fns, VDRef.get(), Zero, true,
+                                       true, FD);
+      else
+        Comp = S.CreateBuiltinBinOp(Loc, BO_NE, VDRef.get(), Zero);
       if (Comp.isInvalid())
         return StmtError();
       Sema::ConditionResult Cond = S.ActOnCondition(
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index ea4b93ee6a5a..29562615e588 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -245,8 +245,8 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, ArrayRef<SourceLocation> Locs,
     return true;
   }
 
-  // See if this is a deleted function.
   if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
+    // See if this is a deleted function.
     if (FD->isDeleted()) {
       auto *Ctor = dyn_cast<CXXConstructorDecl>(FD);
       if (Ctor && Ctor->isInheritingConstructor())
@@ -259,6 +259,29 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, ArrayRef<SourceLocation> Locs,
       return true;
     }
 
+    // [expr.prim.id]p4
+    //   A program that refers explicitly or implicitly to a function with a
+    //   trailing requires-clause whose constraint-expression is not satisfied,
+    //   other than to declare it, is ill-formed. [...]
+    //
+    // See if this is a function with constraints that need to be satisfied.
+    // Check this before deducing the return type, as it might instantiate the
+    // definition.
+    if (FD->getTrailingRequiresClause()) {
+      ConstraintSatisfaction Satisfaction;
+      if (CheckFunctionConstraints(FD, Satisfaction, Loc))
+        // A diagnostic will have already been generated (non-constant
+        // constraint expression, for example)
+        return true;
+      if (!Satisfaction.IsSatisfied) {
+        Diag(Loc,
+             diag::err_reference_to_function_with_unsatisfied_constraints)
+            << D;
+        DiagnoseUnsatisfiedConstraint(Satisfaction);
+        return true;
+      }
+    }
+
     // If the function has a deduced return type, and we can't deduce it,
     // then we can't use it either.
     if (getLangOpts().CPlusPlus14 && FD->getReturnType()->isUndeducedType() &&
@@ -326,30 +349,6 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, ArrayRef<SourceLocation> Locs,
 
   diagnoseUseOfInternalDeclInInlineFunction(*this, D, Loc);
 
-  // [expr.prim.id]p4
-  //   A program that refers explicitly or implicitly to a function with a
-  //   trailing requires-clause whose constraint-expression is not satisfied,
-  //   other than to declare it, is ill-formed. [...]
-  //
-  // See if this is a function with constraints that need to be satisfied.
-  if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
-    if (Expr *RC = FD->getTrailingRequiresClause()) {
-      ConstraintSatisfaction Satisfaction;
-      bool Failed = CheckConstraintSatisfaction(RC, Satisfaction);
-      if (Failed)
-        // A diagnostic will have already been generated (non-constant
-        // constraint expression, for example)
-        return true;
-      if (!Satisfaction.IsSatisfied) {
-        Diag(Loc,
-             diag::err_reference_to_function_with_unsatisfied_constraints)
-            << D;
-        DiagnoseUnsatisfiedConstraint(Satisfaction);
-        return true;
-      }
-    }
-  }
-
   if (isa<ParmVarDecl>(D) && isa<RequiresExprBodyDecl>(D->getDeclContext()) &&
       !isUnevaluatedContext()) {
     // C++ [expr.prim.req.nested] p3
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 192c237b6c1c..98af7fb73eca 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -8487,7 +8487,8 @@ concepts::NestedRequirement *
 Sema::BuildNestedRequirement(Expr *Constraint) {
   ConstraintSatisfaction Satisfaction;
   if (!Constraint->isInstantiationDependent() &&
-      CheckConstraintSatisfaction(Constraint, Satisfaction))
+      CheckConstraintSatisfaction(nullptr, {Constraint}, /*TemplateArgs=*/{},
+                                  Constraint->getSourceRange(), Satisfaction))
     return nullptr;
   return new (Context) concepts::NestedRequirement(Context, Constraint,
                                                    Satisfaction);
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 0fd932fac970..db1884acd349 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -3176,7 +3176,7 @@ static bool isNonTrivialObjCLifetimeConversion(Qualifiers FromQuals,
 /// FromType and \p ToType is permissible, given knowledge about whether every
 /// outer layer is const-qualified.
 static bool isQualificationConversionStep(QualType FromType, QualType ToType,
-                                          bool CStyle,
+                                          bool CStyle, bool IsTopLevel,
                                           bool &PreviousToQualsIncludeConst,
                                           bool &ObjCLifetimeConversion) {
   Qualifiers FromQuals = FromType.getQualifiers();
@@ -3213,11 +3213,15 @@ static bool isQualificationConversionStep(QualType FromType, QualType ToType,
   if (!CStyle && !ToQuals.compatiblyIncludes(FromQuals))
     return false;
 
-  // For a C-style cast, just require the address spaces to overlap.
-  // FIXME: Does "superset" also imply the representation of a pointer is the
-  // same? We're assuming that it does here and in compatiblyIncludes.
-  if (CStyle && !ToQuals.isAddressSpaceSupersetOf(FromQuals) &&
-      !FromQuals.isAddressSpaceSupersetOf(ToQuals))
+  // If address spaces mismatch:
+  //  - in top level it is only valid to convert to addr space that is a
+  //    superset in all cases apart from C-style casts where we allow
+  //    conversions between overlapping address spaces.
+  //  - in non-top levels it is not a valid conversion.
+  if (ToQuals.getAddressSpace() != FromQuals.getAddressSpace() &&
+      (!IsTopLevel ||
+       !(ToQuals.isAddressSpaceSupersetOf(FromQuals) ||
+         (CStyle && FromQuals.isAddressSpaceSupersetOf(ToQuals)))))
     return false;
 
   //   -- if the cv 1,j and cv 2,j are different, then const is in
@@ -3258,9 +3262,9 @@ Sema::IsQualificationConversion(QualType FromType, QualType ToType,
   bool PreviousToQualsIncludeConst = true;
   bool UnwrappedAnyPointer = false;
   while (Context.UnwrapSimilarTypes(FromType, ToType)) {
-    if (!isQualificationConversionStep(FromType, ToType, CStyle,
-                                       PreviousToQualsIncludeConst,
-                                       ObjCLifetimeConversion))
+    if (!isQualificationConversionStep(
+            FromType, ToType, CStyle, !UnwrappedAnyPointer,
+            PreviousToQualsIncludeConst, ObjCLifetimeConversion))
       return false;
     UnwrappedAnyPointer = true;
   }
@@ -4499,7 +4503,7 @@ Sema::CompareReferenceRelationship(SourceLocation Loc,
     // If we find a qualifier mismatch, the types are not reference-compatible,
     // but are still be reference-related if they're similar.
     bool ObjCLifetimeConversion = false;
-    if (!isQualificationConversionStep(T2, T1, /*CStyle=*/false,
+    if (!isQualificationConversionStep(T2, T1, /*CStyle=*/false, TopLevel,
                                        PreviousToQualsIncludeConst,
                                        ObjCLifetimeConversion))
       return (ConvertedReferent || Context.hasSimilarType(T1, T2))
@@ -6291,9 +6295,9 @@ void Sema::AddOverloadCandidate(
         return;
       }
 
-  if (Expr *RequiresClause = Function->getTrailingRequiresClause()) {
+  if (Function->getTrailingRequiresClause()) {
     ConstraintSatisfaction Satisfaction;
-    if (CheckConstraintSatisfaction(RequiresClause, Satisfaction) ||
+    if (CheckFunctionConstraints(Function, Satisfaction) ||
         !Satisfaction.IsSatisfied) {
       Candidate.Viable = false;
       Candidate.FailureKind = ovl_fail_constraints_not_satisfied;
@@ -6808,9 +6812,9 @@ Sema::AddMethodCandidate(CXXMethodDecl *Method, DeclAccessPair FoundDecl,
         return;
       }
 
-  if (Expr *RequiresClause = Method->getTrailingRequiresClause()) {
+  if (Method->getTrailingRequiresClause()) {
     ConstraintSatisfaction Satisfaction;
-    if (CheckConstraintSatisfaction(RequiresClause, Satisfaction) ||
+    if (CheckFunctionConstraints(Method, Satisfaction) ||
         !Satisfaction.IsSatisfied) {
       Candidate.Viable = false;
       Candidate.FailureKind = ovl_fail_constraints_not_satisfied;
@@ -7204,10 +7208,9 @@ void Sema::AddConversionCandidate(
     return;
   }
 
-  Expr *RequiresClause = Conversion->getTrailingRequiresClause();
-  if (RequiresClause) {
+  if (Conversion->getTrailingRequiresClause()) {
     ConstraintSatisfaction Satisfaction;
-    if (CheckConstraintSatisfaction(RequiresClause, Satisfaction) ||
+    if (CheckFunctionConstraints(Conversion, Satisfaction) ||
         !Satisfaction.IsSatisfied) {
       Candidate.Viable = false;
       Candidate.FailureKind = ovl_fail_constraints_not_satisfied;
@@ -9270,17 +9273,31 @@ Sema::AddArgumentDependentLookupCandidates(DeclarationName Name,
       if (ExplicitTemplateArgs)
         continue;
 
-      AddOverloadCandidate(FD, FoundDecl, Args, CandidateSet,
-                           /*SuppressUserConversions=*/false, PartialOverloading,
-                           /*AllowExplicit*/ true,
-                           /*AllowExplicitConversions*/ false,
-                           ADLCallKind::UsesADL);
+      AddOverloadCandidate(
+          FD, FoundDecl, Args, CandidateSet, /*SuppressUserConversions=*/false,
+          PartialOverloading, /*AllowExplicit=*/true,
+          /*AllowExplicitConversions=*/false, ADLCallKind::UsesADL);
+      if (CandidateSet.getRewriteInfo().shouldAddReversed(Context, FD)) {
+        AddOverloadCandidate(
+            FD, FoundDecl, {Args[1], Args[0]}, CandidateSet,
+            /*SuppressUserConversions=*/false, PartialOverloading,
+            /*AllowExplicit=*/true, /*AllowExplicitConversions=*/false,
+            ADLCallKind::UsesADL, None, OverloadCandidateParamOrder::Reversed);
+      }
     } else {
+      auto *FTD = cast<FunctionTemplateDecl>(*I);
       AddTemplateOverloadCandidate(
-          cast<FunctionTemplateDecl>(*I), FoundDecl, ExplicitTemplateArgs, Args,
-          CandidateSet,
+          FTD, FoundDecl, ExplicitTemplateArgs, Args, CandidateSet,
           /*SuppressUserConversions=*/false, PartialOverloading,
-          /*AllowExplicit*/true, ADLCallKind::UsesADL);
+          /*AllowExplicit=*/true, ADLCallKind::UsesADL);
+      if (CandidateSet.getRewriteInfo().shouldAddReversed(
+              Context, FTD->getTemplatedDecl())) {
+        AddTemplateOverloadCandidate(
+            FTD, FoundDecl, ExplicitTemplateArgs, {Args[1], Args[0]},
+            CandidateSet, /*SuppressUserConversions=*/false, PartialOverloading,
+            /*AllowExplicit=*/true, ADLCallKind::UsesADL,
+            OverloadCandidateParamOrder::Reversed);
+      }
     }
   }
 }
@@ -9566,17 +9583,15 @@ bool clang::isBetterOverloadCandidate(
       if (RC1 && RC2) {
         bool AtLeastAsConstrained1, AtLeastAsConstrained2;
         if (S.IsAtLeastAsConstrained(Cand1.Function, {RC1}, Cand2.Function,
-                                     {RC2}, AtLeastAsConstrained1))
-          return false;
-        if (!AtLeastAsConstrained1)
-          return false;
-        if (S.IsAtLeastAsConstrained(Cand2.Function, {RC2}, Cand1.Function,
+                                     {RC2}, AtLeastAsConstrained1) ||
+            S.IsAtLeastAsConstrained(Cand2.Function, {RC2}, Cand1.Function,
                                      {RC1}, AtLeastAsConstrained2))
           return false;
-        if (!AtLeastAsConstrained2)
-          return true;
-      } else if (RC1 || RC2)
+        if (AtLeastAsConstrained1 != AtLeastAsConstrained2)
+          return AtLeastAsConstrained1;
+      } else if (RC1 || RC2) {
         return RC1 != nullptr;
+      }
     }
   }
 
@@ -9947,9 +9962,9 @@ static bool checkAddressOfFunctionIsAvailable(Sema &S, const FunctionDecl *FD,
     return false;
   }
 
-  if (const Expr *RC = FD->getTrailingRequiresClause()) {
+  if (FD->getTrailingRequiresClause()) {
     ConstraintSatisfaction Satisfaction;
-    if (S.CheckConstraintSatisfaction(RC, Satisfaction))
+    if (S.CheckFunctionConstraints(FD, Satisfaction, Loc))
       return false;
     if (!Satisfaction.IsSatisfied) {
       if (Complain) {
@@ -10974,8 +10989,7 @@ static void NoteFunctionCandidate(Sema &S, OverloadCandidate *Cand,
         << (unsigned)FnKindPair.first << (unsigned)ocs_non_template
         << FnDesc /* Ignored */;
     ConstraintSatisfaction Satisfaction;
-    if (S.CheckConstraintSatisfaction(Fn->getTrailingRequiresClause(),
-                                      Satisfaction))
+    if (S.CheckFunctionConstraints(Fn, Satisfaction))
       break;
     S.DiagnoseUnsatisfiedConstraint(Satisfaction);
   }
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index f961244da072..ad4ea2d2593d 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -2047,12 +2047,14 @@ private:
       if (const auto *TC = TTP->getTypeConstraint()) {
         TemplateArgumentListInfo TransformedArgs;
         const auto *ArgsAsWritten = TC->getTemplateArgsAsWritten();
-        if (SemaRef.Subst(ArgsAsWritten->getTemplateArgs(),
+        if (!ArgsAsWritten ||
+            SemaRef.Subst(ArgsAsWritten->getTemplateArgs(),
                           ArgsAsWritten->NumTemplateArgs, TransformedArgs,
                           Args))
           SemaRef.AttachTypeConstraint(
               TC->getNestedNameSpecifierLoc(), TC->getConceptNameInfo(),
-              TC->getNamedConcept(), &TransformedArgs, NewTTP,
+              TC->getNamedConcept(), ArgsAsWritten ? &TransformedArgs : nullptr,
+              NewTTP,
               NewTTP->isParameterPack()
                  ? cast<CXXFoldExpr>(TC->getImmediatelyDeclaredConstraint())
                      ->getEllipsisLoc()
diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp
index 394c81c82794..6b865a601f9d 100644
--- a/clang/lib/Sema/SemaTemplateDeduction.cpp
+++ b/clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -2488,7 +2488,7 @@ Sema::getTrivialTemplateArgumentLoc(const TemplateArgument &Arg,
     case TemplateArgument::Template:
     case TemplateArgument::TemplateExpansion: {
       NestedNameSpecifierLocBuilder Builder;
-      TemplateName Template = Arg.getAsTemplate();
+      TemplateName Template = Arg.getAsTemplateOrTemplatePattern();
       if (DependentTemplateName *DTN = Template.getAsDependentTemplateName())
         Builder.MakeTrivial(Context, DTN->getQualifier(), Loc);
       else if (QualifiedTemplateName *QTN =
@@ -2514,27 +2514,10 @@ Sema::getTrivialTemplateArgumentLoc(const TemplateArgument &Arg,
 }
 
 TemplateArgumentLoc
-Sema::getIdentityTemplateArgumentLoc(Decl *TemplateParm,
+Sema::getIdentityTemplateArgumentLoc(NamedDecl *TemplateParm,
                                      SourceLocation Location) {
-  if (auto *TTP = dyn_cast<TemplateTypeParmDecl>(TemplateParm))
-    return getTrivialTemplateArgumentLoc(
-        TemplateArgument(
-            Context.getTemplateTypeParmType(TTP->getDepth(), TTP->getIndex(),
-                                            TTP->isParameterPack(), TTP)),
-        QualType(), Location.isValid() ? Location : TTP->getLocation());
-  else if (auto *TTP = dyn_cast<TemplateTemplateParmDecl>(TemplateParm))
-    return getTrivialTemplateArgumentLoc(TemplateArgument(TemplateName(TTP)),
-                                         QualType(),
-                                         Location.isValid() ? Location :
-                                         TTP->getLocation());
-  auto *NTTP = cast<NonTypeTemplateParmDecl>(TemplateParm);
-  CXXScopeSpec SS;
-  DeclarationNameInfo Info(NTTP->getDeclName(),
-                           Location.isValid() ? Location : NTTP->getLocation());
-  Expr *E = BuildDeclarationNameExpr(SS, Info, NTTP).get();
-  return getTrivialTemplateArgumentLoc(TemplateArgument(E), NTTP->getType(),
-                                       Location.isValid() ? Location :
-                                       NTTP->getLocation());
+  return getTrivialTemplateArgumentLoc(
+      Context.getInjectedTemplateArg(TemplateParm), QualType(), Location);
 }
 
 /// Convert the given deduced template argument and add it to the set of
@@ -3456,13 +3439,16 @@ Sema::TemplateDeductionResult Sema::FinishTemplateArgumentDeduction(
   //   ([temp.constr.decl]), those constraints are checked for satisfaction
   //   ([temp.constr.constr]). If the constraints are not satisfied, type
   //   deduction fails.
-  if (CheckInstantiatedFunctionTemplateConstraints(Info.getLocation(),
-          Specialization, Builder, Info.AssociatedConstraintsSatisfaction))
-    return TDK_MiscellaneousDeductionFailure;
+  if (!PartialOverloading ||
+      (Builder.size() == FunctionTemplate->getTemplateParameters()->size())) {
+    if (CheckInstantiatedFunctionTemplateConstraints(Info.getLocation(),
+            Specialization, Builder, Info.AssociatedConstraintsSatisfaction))
+      return TDK_MiscellaneousDeductionFailure;
 
-  if (!Info.AssociatedConstraintsSatisfaction.IsSatisfied) {
-    Info.reset(TemplateArgumentList::CreateCopy(Context, Builder));
-    return TDK_ConstraintsNotSatisfied;
+    if (!Info.AssociatedConstraintsSatisfaction.IsSatisfied) {
+      Info.reset(TemplateArgumentList::CreateCopy(Context, Builder));
+      return TDK_ConstraintsNotSatisfied;
+    }
   }
 
   if (OriginalCallArgs) {
diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp
index 39bc28d62305..568f5404dc0b 100644
--- a/clang/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp
@@ -18,6 +18,7 @@
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/PrettyDeclStackTrace.h"
+#include "clang/AST/TypeVisitor.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/Stack.h"
 #include "clang/Sema/DeclSpec.h"
@@ -763,21 +764,30 @@ void Sema::PrintInstantiationStack() {
     
     case CodeSynthesisContext::ConstraintsCheck: {
       unsigned DiagID = 0;
+      if (!Active->Entity) {
+        Diags.Report(Active->PointOfInstantiation,
+                     diag::note_nested_requirement_here)
+          << Active->InstantiationRange;
+        break;
+      }
       if (isa<ConceptDecl>(Active->Entity))
         DiagID = diag::note_concept_specialization_here;
       else if (isa<TemplateDecl>(Active->Entity))
         DiagID = diag::note_checking_constraints_for_template_id_here;
       else if (isa<VarTemplatePartialSpecializationDecl>(Active->Entity))
         DiagID = diag::note_checking_constraints_for_var_spec_id_here;
-      else {
-        assert(isa<ClassTemplatePartialSpecializationDecl>(Active->Entity));
+      else if (isa<ClassTemplatePartialSpecializationDecl>(Active->Entity))
         DiagID = diag::note_checking_constraints_for_class_spec_id_here;
+      else {
+        assert(isa<FunctionDecl>(Active->Entity));
+        DiagID = diag::note_checking_constraints_for_function_here;
       }
       SmallVector<char, 128> TemplateArgsStr;
       llvm::raw_svector_ostream OS(TemplateArgsStr);
       cast<NamedDecl>(Active->Entity)->printName(OS);
-      printTemplateArgumentList(OS, Active->template_arguments(),
-                                getPrintingPolicy());
+      if (!isa<FunctionDecl>(Active->Entity))
+        printTemplateArgumentList(OS, Active->template_arguments(),
+                                  getPrintingPolicy());
       Diags.Report(Active->PointOfInstantiation, DiagID) << OS.str()
         << Active->InstantiationRange;
       break;
@@ -1048,6 +1058,8 @@ namespace {
                                             NonTypeTemplateParmDecl *D);
     ExprResult TransformSubstNonTypeTemplateParmPackExpr(
                                            SubstNonTypeTemplateParmPackExpr *E);
+    ExprResult TransformSubstNonTypeTemplateParmExpr(
+                                           SubstNonTypeTemplateParmExpr *E);
 
     /// Rebuild a DeclRefExpr for a VarDecl reference.
     ExprResult RebuildVarDeclRefExpr(VarDecl *PD, SourceLocation Loc);
@@ -1526,6 +1538,44 @@ TemplateInstantiator::TransformSubstNonTypeTemplateParmPackExpr(
                                          Arg);
 }
 
+ExprResult
+TemplateInstantiator::TransformSubstNonTypeTemplateParmExpr(
+                                          SubstNonTypeTemplateParmExpr *E) {
+  ExprResult SubstReplacement = TransformExpr(E->getReplacement());
+  if (SubstReplacement.isInvalid())
+    return true;
+  QualType SubstType = TransformType(E->getType());
+  if (SubstType.isNull())
+    return true;
+  // The type may have been previously dependent and not now, which means we
+  // might have to implicit cast the argument to the new type, for example:
+  // template<auto T, decltype(T) U>
+  // concept C = sizeof(U) == 4;
+  // void foo() requires C<2, 'a'> { }
+  // When normalizing foo(), we first form the normalized constraints of C:
+  // AtomicExpr(sizeof(U) == 4,
+  //            U=SubstNonTypeTemplateParmExpr(Param=U,
+  //                                           Expr=DeclRef(U),
+  //                                           Type=decltype(T)))
+  // Then we substitute T = 2, U = 'a' into the parameter mapping, and need to
+  // produce:
+  // AtomicExpr(sizeof(U) == 4,
+  //            U=SubstNonTypeTemplateParmExpr(Param=U,
+  //                                           Expr=ImpCast(
+  //                                               decltype(2),
+  //                                               SubstNTTPE(Param=U, Expr='a',
+  //                                                          Type=char)),
+  //                                           Type=decltype(2)))
+  // The call to CheckTemplateArgument here produces the ImpCast.
+  TemplateArgument Converted;
+  if (SemaRef.CheckTemplateArgument(E->getParameter(), SubstType,
+                                    SubstReplacement.get(),
+                                    Converted).isInvalid())
+    return true;
+  return transformNonTypeTemplateParmRef(E->getParameter(),
+                                         E->getExprLoc(), Converted);
+}
+
 ExprResult TemplateInstantiator::RebuildVarDeclRefExpr(VarDecl *PD,
                                                        SourceLocation Loc) {
   DeclarationNameInfo NameInfo(PD->getDeclName(), Loc);
@@ -2096,6 +2146,94 @@ void Sema::SubstExceptionSpec(FunctionDecl *New, const FunctionProtoType *Proto,
   UpdateExceptionSpec(New, ESI);
 }
 
+namespace {
+
+  struct GetContainedInventedTypeParmVisitor :
+    public TypeVisitor<GetContainedInventedTypeParmVisitor,
+                       TemplateTypeParmDecl *> {
+    using TypeVisitor<GetContainedInventedTypeParmVisitor,
+                      TemplateTypeParmDecl *>::Visit;
+
+    TemplateTypeParmDecl *Visit(QualType T) {
+      if (T.isNull())
+        return nullptr;
+      return Visit(T.getTypePtr());
+    }
+    // The deduced type itself.
+    TemplateTypeParmDecl *VisitTemplateTypeParmType(
+        const TemplateTypeParmType *T) {
+      if (!T->getDecl()->isImplicit())
+        return nullptr;
+      return T->getDecl();
+    }
+
+    // Only these types can contain 'auto' types, and subsequently be replaced
+    // by references to invented parameters.
+
+    TemplateTypeParmDecl *VisitElaboratedType(const ElaboratedType *T) {
+      return Visit(T->getNamedType());
+    }
+
+    TemplateTypeParmDecl *VisitPointerType(const PointerType *T) {
+      return Visit(T->getPointeeType());
+    }
+
+    TemplateTypeParmDecl *VisitBlockPointerType(const BlockPointerType *T) {
+      return Visit(T->getPointeeType());
+    }
+
+    TemplateTypeParmDecl *VisitReferenceType(const ReferenceType *T) {
+      return Visit(T->getPointeeTypeAsWritten());
+    }
+
+    TemplateTypeParmDecl *VisitMemberPointerType(const MemberPointerType *T) {
+      return Visit(T->getPointeeType());
+    }
+
+    TemplateTypeParmDecl *VisitArrayType(const ArrayType *T) {
+      return Visit(T->getElementType());
+    }
+
+    TemplateTypeParmDecl *VisitDependentSizedExtVectorType(
+      const DependentSizedExtVectorType *T) {
+      return Visit(T->getElementType());
+    }
+
+    TemplateTypeParmDecl *VisitVectorType(const VectorType *T) {
+      return Visit(T->getElementType());
+    }
+
+    TemplateTypeParmDecl *VisitFunctionProtoType(const FunctionProtoType *T) {
+      return VisitFunctionType(T);
+    }
+
+    TemplateTypeParmDecl *VisitFunctionType(const FunctionType *T) {
+      return Visit(T->getReturnType());
+    }
+
+    TemplateTypeParmDecl *VisitParenType(const ParenType *T) {
+      return Visit(T->getInnerType());
+    }
+
+    TemplateTypeParmDecl *VisitAttributedType(const AttributedType *T) {
+      return Visit(T->getModifiedType());
+    }
+
+    TemplateTypeParmDecl *VisitMacroQualifiedType(const MacroQualifiedType *T) {
+      return Visit(T->getUnderlyingType());
+    }
+
+    TemplateTypeParmDecl *VisitAdjustedType(const AdjustedType *T) {
+      return Visit(T->getOriginalType());
+    }
+
+    TemplateTypeParmDecl *VisitPackExpansionType(const PackExpansionType *T) {
+      return Visit(T->getPattern());
+    }
+  };
+
+} // namespace
+
 ParmVarDecl *Sema::SubstParmVarDecl(ParmVarDecl *OldParm,
                             const MultiLevelTemplateArgumentList &TemplateArgs,
                                     int indexAdjustment,
@@ -2143,6 +2281,46 @@ ParmVarDecl *Sema::SubstParmVarDecl(ParmVarDecl *OldParm,
     return nullptr;
   }
 
+  // In abbreviated templates, TemplateTypeParmDecls with possible
+  // TypeConstraints are created when the parameter list is originally parsed.
+  // The TypeConstraints can therefore reference other functions parameters in
+  // the abbreviated function template, which is why we must instantiate them
+  // here, when the instantiated versions of those referenced parameters are in
+  // scope.
+  if (TemplateTypeParmDecl *TTP =
+          GetContainedInventedTypeParmVisitor().Visit(OldDI->getType())) {
+    if (const TypeConstraint *TC = TTP->getTypeConstraint()) {
+      auto *Inst = cast_or_null<TemplateTypeParmDecl>(
+          FindInstantiatedDecl(TTP->getLocation(), TTP, TemplateArgs));
+      // We will first get here when instantiating the abbreviated function
+      // template's described function, but we might also get here later.
+      // Make sure we do not instantiate the TypeConstraint more than once.
+      if (Inst && !Inst->getTypeConstraint()) {
+        // TODO: Concepts: do not instantiate the constraint (delayed constraint
+        // substitution)
+        const ASTTemplateArgumentListInfo *TemplArgInfo
+          = TC->getTemplateArgsAsWritten();
+        TemplateArgumentListInfo InstArgs;
+
+        if (TemplArgInfo) {
+          InstArgs.setLAngleLoc(TemplArgInfo->LAngleLoc);
+          InstArgs.setRAngleLoc(TemplArgInfo->RAngleLoc);
+          if (Subst(TemplArgInfo->getTemplateArgs(),
+                    TemplArgInfo->NumTemplateArgs, InstArgs, TemplateArgs))
+            return nullptr;
+        }
+        if (AttachTypeConstraint(
+                TC->getNestedNameSpecifierLoc(), TC->getConceptNameInfo(),
+                TC->getNamedConcept(), &InstArgs, Inst,
+                TTP->isParameterPack()
+                    ? cast<CXXFoldExpr>(TC->getImmediatelyDeclaredConstraint())
+                        ->getEllipsisLoc()
+                    : SourceLocation()))
+          return nullptr;
+      }
+    }
+  }
+
   ParmVarDecl *NewParm = CheckParameter(Context.getTranslationUnitDecl(),
                                         OldParm->getInnerLocStart(),
                                         OldParm->getLocation(),
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index fbbab8f00703..37dace3bee7f 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -1837,6 +1837,23 @@ Decl *TemplateDeclInstantiator::VisitFunctionDecl(
     return nullptr;
   QualType T = adjustFunctionTypeForInstantiation(SemaRef.Context, D, TInfo);
 
+  if (TemplateParams && TemplateParams->size()) {
+    auto *LastParam =
+        dyn_cast<TemplateTypeParmDecl>(TemplateParams->asArray().back());
+    if (LastParam && LastParam->isImplicit() &&
+        LastParam->hasTypeConstraint()) {
+      // In abbreviated templates, the type-constraints of invented template
+      // type parameters are instantiated with the function type, invalidating
+      // the TemplateParameterList which relied on the template type parameter
+      // not having a type constraint. Recreate the TemplateParameterList with
+      // the updated parameter list.
+      TemplateParams = TemplateParameterList::Create(
+          SemaRef.Context, TemplateParams->getTemplateLoc(),
+          TemplateParams->getLAngleLoc(), TemplateParams->asArray(),
+          TemplateParams->getRAngleLoc(), TemplateParams->getRequiresClause());
+    }
+  }
+
   NestedNameSpecifierLoc QualifierLoc = D->getQualifierLoc();
   if (QualifierLoc) {
     QualifierLoc = SemaRef.SubstNestedNameSpecifierLoc(QualifierLoc,
@@ -2177,6 +2194,23 @@ Decl *TemplateDeclInstantiator::VisitCXXMethodDecl(
     return nullptr;
   QualType T = adjustFunctionTypeForInstantiation(SemaRef.Context, D, TInfo);
 
+  if (TemplateParams && TemplateParams->size()) {
+    auto *LastParam =
+        dyn_cast<TemplateTypeParmDecl>(TemplateParams->asArray().back());
+    if (LastParam && LastParam->isImplicit() &&
+        LastParam->hasTypeConstraint()) {
+      // In abbreviated templates, the type-constraints of invented template
+      // type parameters are instantiated with the function type, invalidating
+      // the TemplateParameterList which relied on the template type parameter
+      // not having a type constraint. Recreate the TemplateParameterList with
+      // the updated parameter list.
+      TemplateParams = TemplateParameterList::Create(
+          SemaRef.Context, TemplateParams->getTemplateLoc(),
+          TemplateParams->getLAngleLoc(), TemplateParams->asArray(),
+          TemplateParams->getRAngleLoc(), TemplateParams->getRequiresClause());
+    }
+  }
+
   NestedNameSpecifierLoc QualifierLoc = D->getQualifierLoc();
   if (QualifierLoc) {
     QualifierLoc = SemaRef.SubstNestedNameSpecifierLoc(QualifierLoc,
@@ -2190,6 +2224,9 @@ Decl *TemplateDeclInstantiator::VisitCXXMethodDecl(
   if (TrailingRequiresClause) {
     EnterExpressionEvaluationContext ConstantEvaluated(
         SemaRef, Sema::ExpressionEvaluationContext::Unevaluated);
+    auto *ThisContext = dyn_cast_or_null<CXXRecordDecl>(Owner);
+    Sema::CXXThisScopeRAII ThisScope(SemaRef, ThisContext,
+                                     D->getMethodQualifiers(), ThisContext);
     ExprResult SubstRC = SemaRef.SubstExpr(TrailingRequiresClause,
                                            TemplateArgs);
     if (SubstRC.isInvalid())
@@ -2522,28 +2559,34 @@ Decl *TemplateDeclInstantiator::VisitTemplateTypeParmDecl(
   Inst->setAccess(AS_public);
   Inst->setImplicit(D->isImplicit());
   if (auto *TC = D->getTypeConstraint()) {
-    // TODO: Concepts: do not instantiate the constraint (delayed constraint
-    // substitution)
-    const ASTTemplateArgumentListInfo *TemplArgInfo
-      = TC->getTemplateArgsAsWritten();
-    TemplateArgumentListInfo InstArgs;
-
-    if (TemplArgInfo) {
-      InstArgs.setLAngleLoc(TemplArgInfo->LAngleLoc);
-      InstArgs.setRAngleLoc(TemplArgInfo->RAngleLoc);
-      if (SemaRef.Subst(TemplArgInfo->getTemplateArgs(),
-                        TemplArgInfo->NumTemplateArgs,
-                        InstArgs, TemplateArgs))
+    if (!D->isImplicit()) {
+      // Invented template parameter type constraints will be instantiated with
+      // the corresponding auto-typed parameter as it might reference other
+      // parameters.
+
+      // TODO: Concepts: do not instantiate the constraint (delayed constraint
+      // substitution)
+      const ASTTemplateArgumentListInfo *TemplArgInfo
+        = TC->getTemplateArgsAsWritten();
+      TemplateArgumentListInfo InstArgs;
+
+      if (TemplArgInfo) {
+        InstArgs.setLAngleLoc(TemplArgInfo->LAngleLoc);
+        InstArgs.setRAngleLoc(TemplArgInfo->RAngleLoc);
+        if (SemaRef.Subst(TemplArgInfo->getTemplateArgs(),
+                          TemplArgInfo->NumTemplateArgs,
+                          InstArgs, TemplateArgs))
+          return nullptr;
+      }
+      if (SemaRef.AttachTypeConstraint(
+              TC->getNestedNameSpecifierLoc(), TC->getConceptNameInfo(),
+              TC->getNamedConcept(), &InstArgs, Inst,
+              D->isParameterPack()
+                  ? cast<CXXFoldExpr>(TC->getImmediatelyDeclaredConstraint())
+                      ->getEllipsisLoc()
+                  : SourceLocation()))
         return nullptr;
     }
-    if (SemaRef.AttachTypeConstraint(
-            TC->getNestedNameSpecifierLoc(), TC->getConceptNameInfo(),
-            TC->getNamedConcept(), &InstArgs, Inst,
-            D->isParameterPack()
-                ? cast<CXXFoldExpr>(TC->getImmediatelyDeclaredConstraint())
-                    ->getEllipsisLoc()
-                : SourceLocation()))
-      return nullptr;
   }
   if (D->hasDefaultArgument() && !D->defaultArgumentWasInherited()) {
     TypeSourceInfo *InstantiatedDefaultArg =
@@ -4246,24 +4289,29 @@ bool Sema::CheckInstantiatedFunctionTemplateConstraints(
   Sema::ContextRAII savedContext(*this, Decl);
   LocalInstantiationScope Scope(*this);
 
-  MultiLevelTemplateArgumentList MLTAL =
-    getTemplateInstantiationArgs(Decl, nullptr, /*RelativeToPrimary*/true);
-
   // If this is not an explicit specialization - we need to get the instantiated
   // version of the template arguments and add them to scope for the
   // substitution.
   if (Decl->isTemplateInstantiation()) {
     InstantiatingTemplate Inst(*this, Decl->getPointOfInstantiation(),
         InstantiatingTemplate::ConstraintsCheck{}, Decl->getPrimaryTemplate(),
-        MLTAL.getInnermost(), SourceRange());
+        TemplateArgs, SourceRange());
     if (Inst.isInvalid())
       return true;
+    MultiLevelTemplateArgumentList MLTAL(
+        *Decl->getTemplateSpecializationArgs());
     if (addInstantiatedParametersToScope(
             *this, Decl, Decl->getPrimaryTemplate()->getTemplatedDecl(),
             Scope, MLTAL))
       return true;
   }
-
+  Qualifiers ThisQuals;
+  CXXRecordDecl *Record = nullptr;
+  if (auto *Method = dyn_cast<CXXMethodDecl>(Decl)) {
+    ThisQuals = Method->getMethodQualifiers();
+    Record = Method->getParent();
+  }
+  CXXThisScopeRAII ThisScope(*this, Record, ThisQuals, Record != nullptr);
   return CheckConstraintSatisfaction(Template, TemplateAC, TemplateArgs,
                                      PointOfInstantiation, Satisfaction);
 }
diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index 093b69ab19d0..362b5a564ab9 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -555,7 +555,7 @@ void ASTDeclReader::Visit(Decl *D) {
 
 void ASTDeclReader::VisitDecl(Decl *D) {
   if (D->isTemplateParameter() || D->isTemplateParameterPack() ||
-      isa<ParmVarDecl>(D)) {
+      isa<ParmVarDecl>(D) || isa<ObjCTypeParamDecl>(D)) {
     // We don't want to deserialize the DeclContext of a template
     // parameter or of a parameter of a function template immediately.   These
     // entities might be used in the formulation of its DeclContext (for
diff --git a/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
index a4918d7179ff..002b6070ddcd 100644
--- a/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
+++ b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
@@ -607,10 +607,17 @@ window.addEventListener("keydown", function (event) {
 )<<<";
 }
 
+static bool shouldDisplayPopUpRange(const SourceRange &Range) {
+  return !(Range.getBegin().isMacroID() || Range.getEnd().isMacroID());
+}
+
 static void
 HandlePopUpPieceStartTag(Rewriter &R,
                          const std::vector<SourceRange> &PopUpRanges) {
   for (const auto &Range : PopUpRanges) {
+    if (!shouldDisplayPopUpRange(Range))
+      continue;
+
     html::HighlightRange(R, Range.getBegin(), Range.getEnd(), "",
                          "<table class='variable_popup'><tbody>",
                          /*IsTokenRange=*/true);
@@ -626,6 +633,8 @@ static void HandlePopUpPieceEndTag(Rewriter &R,
   llvm::raw_svector_ostream Out(Buf);
 
   SourceRange Range(Piece.getLocation().asRange());
+  if (!shouldDisplayPopUpRange(Range))
+    return;
 
   // Write out the path indices with a right arrow and the message as a row.
   Out << "<tr><td valign='top'><div class='PathIndex PathIndexPopUp'>"
@@ -870,7 +879,7 @@ void HTMLDiagnostics::HandlePiece(Rewriter &R, FileID BugFileID,
          << (num - 1)
          << "\" title=\"Previous event ("
          << (num - 1)
-         << ")\">&#x2190;</a></div></td>";
+         << ")\">&#x2190;</a></div>";
     }
 
     os << "</td><td>";
diff --git a/clang/tools/driver/cc1_main.cpp b/clang/tools/driver/cc1_main.cpp
index b551e9f4cf82..6d1a67f2a4fa 100644
--- a/clang/tools/driver/cc1_main.cpp
+++ b/clang/tools/driver/cc1_main.cpp
@@ -36,6 +36,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
@@ -69,7 +70,7 @@ static void LLVMErrorHandler(void *UserData, const std::string &Message,
   // We cannot recover from llvm errors.  When reporting a fatal error, exit
   // with status 70 to generate crash diagnostics.  For BSD systems this is
   // defined as an internal software error.  Otherwise, exit with status 1.
-  exit(GenCrashDiag ? 70 : 1);
+  llvm::sys::Process::Exit(GenCrashDiag ? 70 : 1);
 }
 
 #ifdef CLANG_HAVE_RLIMITS
diff --git a/clang/tools/driver/cc1as_main.cpp b/clang/tools/driver/cc1as_main.cpp
index 53c8a9d642dc..e1041f91bfd5 100644
--- a/clang/tools/driver/cc1as_main.cpp
+++ b/clang/tools/driver/cc1as_main.cpp
@@ -46,6 +46,7 @@
 #include "llvm/Support/Host.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -547,7 +548,7 @@ static void LLVMErrorHandler(void *UserData, const std::string &Message,
   Diags.Report(diag::err_fe_error_backend) << Message;
 
   // We cannot recover from llvm errors.
-  exit(1);
+  sys::Process::Exit(1);
 }
 
 int cc1as_main(ArrayRef<const char *> Argv, const char *Argv0, void *MainAddr) {
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_mac.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_mac.cpp
index aa29536d8616..91584914d868 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_mac.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_mac.cpp
@@ -23,9 +23,12 @@
 #include <errno.h>
 #include <libkern/OSAtomic.h>
 #include <objc/objc-sync.h>
-#include <os/lock.h>
 #include <sys/ucontext.h>
 
+#if defined(__has_include) && __has_include(<os/lock.h>)
+#include <os/lock.h>
+#endif
+
 #if defined(__has_include) && __has_include(<xpc/xpc.h>)
 #include <xpc/xpc.h>
 #endif  // #if defined(__has_include) && __has_include(<xpc/xpc.h>)
@@ -247,6 +250,8 @@ TSAN_INTERCEPTOR(void, os_lock_unlock, void *lock) {
   REAL(os_lock_unlock)(lock);
 }
 
+#if defined(__has_include) && __has_include(<os/lock.h>)
+
 TSAN_INTERCEPTOR(void, os_unfair_lock_lock, os_unfair_lock_t lock) {
   if (!cur_thread()->is_inited || cur_thread()->is_dead) {
     return REAL(os_unfair_lock_lock)(lock);
@@ -286,6 +291,8 @@ TSAN_INTERCEPTOR(void, os_unfair_lock_unlock, os_unfair_lock_t lock) {
   REAL(os_unfair_lock_unlock)(lock);
 }
 
+#endif  // #if defined(__has_include) && __has_include(<os/lock.h>)
+
 #if defined(__has_include) && __has_include(<xpc/xpc.h>)
 
 TSAN_INTERCEPTOR(void, xpc_connection_set_event_handler,
diff --git a/libcxx/include/__config b/libcxx/include/__config
index 8f48f16c2364..ccce227f4d6b 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -342,6 +342,10 @@
 #    define _LIBCPP_HAS_ALIGNED_ALLOC
 #    define _LIBCPP_HAS_QUICK_EXIT
 #    define _LIBCPP_HAS_C11_FEATURES
+#    if __FreeBSD_version >= 1300064 || \
+       (__FreeBSD_version >= 1201504 && __FreeBSD_version < 1300000)
+#      define _LIBCPP_HAS_TIMESPEC_GET
+#    endif
 #  elif defined(__BIONIC__)
 #    define _LIBCPP_HAS_C11_FEATURES
 #    if __ANDROID_API__ >= 21
diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp
index de1023346aa5..08cae59b294b 100644
--- a/lld/ELF/Arch/ARM.cpp
+++ b/lld/ELF/Arch/ARM.cpp
@@ -275,8 +275,8 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
   case R_ARM_PLT32:
   case R_ARM_JUMP24:
     // Source is ARM, all PLT entries are ARM so no interworking required.
-    // Otherwise we need to interwork if Symbol has bit 0 set (Thumb).
-    if (expr == R_PC && ((s.getVA() & 1) == 1))
+    // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 set (Thumb).
+    if (s.isFunc() && expr == R_PC && (s.getVA() & 1))
       return true;
     LLVM_FALLTHROUGH;
   case R_ARM_CALL: {
@@ -286,8 +286,8 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
   case R_ARM_THM_JUMP19:
   case R_ARM_THM_JUMP24:
     // Source is Thumb, all PLT entries are ARM so interworking is required.
-    // Otherwise we need to interwork if Symbol has bit 0 clear (ARM).
-    if (expr == R_PLT_PC || ((s.getVA() & 1) == 0))
+    // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 clear (ARM).
+    if (expr == R_PLT_PC || (s.isFunc() && (s.getVA() & 1) == 0))
       return true;
     LLVM_FALLTHROUGH;
   case R_ARM_THM_CALL: {
diff --git a/lld/ELF/Arch/PPC.cpp b/lld/ELF/Arch/PPC.cpp
index 1d4e80184dcd..b3cc78710e9a 100644
--- a/lld/ELF/Arch/PPC.cpp
+++ b/lld/ELF/Arch/PPC.cpp
@@ -67,6 +67,18 @@ static void writeFromHalf16(uint8_t *loc, uint32_t insn) {
 }
 
 void writePPC32GlinkSection(uint8_t *buf, size_t numEntries) {
+  // Create canonical PLT entries for non-PIE code. Compilers don't generate
+  // non-GOT-non-PLT relocations referencing external functions for -fpie/-fPIE.
+  uint32_t glink = in.plt->getVA(); // VA of .glink
+  if (!config->isPic) {
+    for (const Symbol *sym : in.plt->entries)
+      if (sym->needsPltAddr) {
+        writePPC32PltCallStub(buf, sym->getGotPltVA(), nullptr, 0);
+        buf += 16;
+        glink += 16;
+      }
+  }
+
   // On PPC Secure PLT ABI, bl foo@plt jumps to a call stub, which loads an
   // absolute address from a specific .plt slot (usually called .got.plt on
   // other targets) and jumps there.
@@ -85,15 +97,14 @@ void writePPC32GlinkSection(uint8_t *buf, size_t numEntries) {
   // computes the PLT index (by computing the distance from the landing b to
   // itself) and calls _dl_runtime_resolve() (in glibc).
   uint32_t got = in.got->getVA();
-  uint32_t glink = in.plt->getVA(); // VA of .glink
   const uint8_t *end = buf + 64;
   if (config->isPic) {
-    uint32_t afterBcl = in.plt->getSize() - target->pltHeaderSize + 12;
+    uint32_t afterBcl = 4 * in.plt->getNumEntries() + 12;
     uint32_t gotBcl = got + 4 - (glink + afterBcl);
     write32(buf + 0, 0x3d6b0000 | ha(afterBcl));  // addis r11,r11,1f-glink@ha
     write32(buf + 4, 0x7c0802a6);                 // mflr r0
     write32(buf + 8, 0x429f0005);                 // bcl 20,30,.+4
-    write32(buf + 12, 0x396b0000 | lo(afterBcl)); // 1: addi r11,r11,1b-.glink@l
+    write32(buf + 12, 0x396b0000 | lo(afterBcl)); // 1: addi r11,r11,1b-glink@l
     write32(buf + 16, 0x7d8802a6);                // mflr r12
     write32(buf + 20, 0x7c0803a6);                // mtlr r0
     write32(buf + 24, 0x7d6c5850);                // sub r11,r11,r12
@@ -113,16 +124,16 @@ void writePPC32GlinkSection(uint8_t *buf, size_t numEntries) {
     buf += 56;
   } else {
     write32(buf + 0, 0x3d800000 | ha(got + 4));   // lis     r12,GOT+4@ha
-    write32(buf + 4, 0x3d6b0000 | ha(-glink));    // addis   r11,r11,-Glink@ha
+    write32(buf + 4, 0x3d6b0000 | ha(-glink));    // addis   r11,r11,-glink@ha
     if (ha(got + 4) == ha(got + 8))
       write32(buf + 8, 0x800c0000 | lo(got + 4)); // lwz r0,GOT+4@l(r12)
     else
       write32(buf + 8, 0x840c0000 | lo(got + 4)); // lwzu r0,GOT+4@l(r12)
-    write32(buf + 12, 0x396b0000 | lo(-glink));   // addi    r11,r11,-Glink@l
+    write32(buf + 12, 0x396b0000 | lo(-glink));   // addi    r11,r11,-glink@l
     write32(buf + 16, 0x7c0903a6);                // mtctr   r0
     write32(buf + 20, 0x7c0b5a14);                // add     r0,r11,r11
     if (ha(got + 4) == ha(got + 8))
-      write32(buf + 24, 0x818c0000 | lo(got + 8)); // lwz r12,GOT+8@ha(r12)
+      write32(buf + 24, 0x818c0000 | lo(got + 8)); // lwz r12,GOT+8@l(r12)
     else
       write32(buf + 24, 0x818c0000 | 4);          // lwz r12,4(r12)
     write32(buf + 28, 0x7d605a14);                // add     r11,r0,r11
@@ -146,7 +157,7 @@ PPC::PPC() {
   gotBaseSymInGotPlt = false;
   gotHeaderEntriesNum = 3;
   gotPltHeaderEntriesNum = 0;
-  pltHeaderSize = 64; // size of PLTresolve in .glink
+  pltHeaderSize = 0;
   pltEntrySize = 4;
   ipltEntrySize = 16;
 
@@ -178,25 +189,25 @@ void PPC::writeGotHeader(uint8_t *buf) const {
 
 void PPC::writeGotPlt(uint8_t *buf, const Symbol &s) const {
   // Address of the symbol resolver stub in .glink .
-  write32(buf, in.plt->getVA() + 4 * s.pltIndex);
+  write32(buf, in.plt->getVA() + in.plt->headerSize + 4 * s.pltIndex);
 }
 
 bool PPC::needsThunk(RelExpr expr, RelType type, const InputFile *file,
-                     uint64_t branchAddr, const Symbol &s, int64_t /*a*/) const {
-  if (type != R_PPC_REL24 && type != R_PPC_PLTREL24)
+                     uint64_t branchAddr, const Symbol &s, int64_t a) const {
+  if (type != R_PPC_LOCAL24PC && type != R_PPC_REL24 && type != R_PPC_PLTREL24)
     return false;
   if (s.isInPlt())
     return true;
   if (s.isUndefWeak())
     return false;
-  return !(expr == R_PC && PPC::inBranchRange(type, branchAddr, s.getVA()));
+  return !PPC::inBranchRange(type, branchAddr, s.getVA(a));
 }
 
 uint32_t PPC::getThunkSectionSpacing() const { return 0x2000000; }
 
 bool PPC::inBranchRange(RelType type, uint64_t src, uint64_t dst) const {
   uint64_t offset = dst - src;
-  if (type == R_PPC_REL24 || type == R_PPC_PLTREL24)
+  if (type == R_PPC_LOCAL24PC || type == R_PPC_REL24 || type == R_PPC_PLTREL24)
     return isInt<26>(offset);
   llvm_unreachable("unsupported relocation type used in branch");
 }
@@ -219,13 +230,13 @@ RelExpr PPC::getRelExpr(RelType type, const Symbol &s,
     return R_DTPREL;
   case R_PPC_REL14:
   case R_PPC_REL32:
-  case R_PPC_LOCAL24PC:
   case R_PPC_REL16_LO:
   case R_PPC_REL16_HI:
   case R_PPC_REL16_HA:
     return R_PC;
   case R_PPC_GOT16:
     return R_GOT_OFF;
+  case R_PPC_LOCAL24PC:
   case R_PPC_REL24:
     return R_PLT_PC;
   case R_PPC_PLTREL24:
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index aab272f53a73..147c51ab285e 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -485,6 +485,14 @@ void InputSection::copyRelocations(uint8_t *buf, ArrayRef<RelTy> rels) {
         p->r_addend = sym.getVA(addend) - section->getOutputSection()->addr;
       else if (config->relocatable && type != target->noneRel)
         sec->relocations.push_back({R_ABS, type, rel.r_offset, addend, &sym});
+    } else if (config->emachine == EM_PPC && type == R_PPC_PLTREL24 &&
+               p->r_addend >= 0x8000) {
+      // Similar to R_MIPS_GPREL{16,32}. If the addend of R_PPC_PLTREL24
+      // indicates that r30 is relative to the input section .got2
+      // (r_addend>=0x8000), after linking, r30 should be relative to the output
+      // section .got2 . To compensate for the shift, adjust r_addend by
+      // ppc32Got2OutSecOff.
+      p->r_addend += sec->file->ppc32Got2OutSecOff;
     }
   }
 }
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index ced9991f2003..93ec06610716 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -1198,10 +1198,16 @@ static void processRelocAux(InputSectionBase &sec, RelExpr expr, RelType type,
                     getLocation(sec, sym, offset));
       if (!sym.isInPlt())
         addPltEntry(in.plt, in.gotPlt, in.relaPlt, target->pltRel, sym);
-      if (!sym.isDefined())
+      if (!sym.isDefined()) {
         replaceWithDefined(
             sym, in.plt,
             target->pltHeaderSize + target->pltEntrySize * sym.pltIndex, 0);
+        if (config->emachine == EM_PPC) {
+          // PPC32 canonical PLT entries are at the beginning of .glink
+          cast<Defined>(sym).value = in.plt->headerSize;
+          in.plt->headerSize += 16;
+        }
+      }
       sym.needsPltAddr = true;
       sec.relocations.push_back({expr, type, offset, addend, &sym});
       return;
@@ -1298,10 +1304,10 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i,
     if (expr == R_GOT_PC && !isAbsoluteValue(sym)) {
       expr = target->adjustRelaxExpr(type, relocatedAddr, expr);
     } else {
-      // Addend of R_PPC_PLTREL24 is used to choose call stub type. It should be
-      // ignored if optimized to R_PC.
+      // The 0x8000 bit of r_addend of R_PPC_PLTREL24 is used to choose call
+      // stub type. It should be ignored if optimized to R_PC.
       if (config->emachine == EM_PPC && expr == R_PPC32_PLTREL)
-        addend = 0;
+        addend &= ~0x8000;
       expr = fromPlt(expr);
     }
   }
@@ -1752,6 +1758,37 @@ ThunkSection *ThunkCreator::addThunkSection(OutputSection *os,
                                             uint64_t off) {
   auto *ts = make<ThunkSection>(os, off);
   ts->partition = os->partition;
+  if ((config->fixCortexA53Errata843419 || config->fixCortexA8) &&
+      !isd->sections.empty()) {
+    // The errata fixes are sensitive to addresses modulo 4 KiB. When we add
+    // thunks we disturb the base addresses of sections placed after the thunks
+    // this makes patches we have generated redundant, and may cause us to
+    // generate more patches as different instructions are now in sensitive
+    // locations. When we generate more patches we may force more branches to
+    // go out of range, causing more thunks to be generated. In pathological
+    // cases this can cause the address dependent content pass not to converge.
+    // We fix this by rounding up the size of the ThunkSection to 4KiB, this
+    // limits the insertion of a ThunkSection on the addresses modulo 4 KiB,
+    // which means that adding Thunks to the section does not invalidate
+    // errata patches for following code.
+    // Rounding up the size to 4KiB has consequences for code-size and can
+    // trip up linker script defined assertions. For example the linux kernel
+    // has an assertion that what LLD represents as an InputSectionDescription
+    // does not exceed 4 KiB even if the overall OutputSection is > 128 Mib.
+    // We use the heuristic of rounding up the size when both of the following
+    // conditions are true:
+    // 1.) The OutputSection is larger than the ThunkSectionSpacing. This
+    //     accounts for the case where no single InputSectionDescription is
+    //     larger than the OutputSection size. This is conservative but simple.
+    // 2.) The InputSectionDescription is larger than 4 KiB. This will prevent
+    //     any assertion failures that an InputSectionDescription is < 4 KiB
+    //     in size.
+    uint64_t isdSize = isd->sections.back()->outSecOff +
+                       isd->sections.back()->getSize() -
+                       isd->sections.front()->outSecOff;
+    if (os->size > target->getThunkSectionSpacing() && isdSize > 4096)
+      ts->roundUpSizeForErrata = true;
+  }
   isd->thunkSections.push_back({ts, pass});
   return ts;
 }
@@ -1820,9 +1857,7 @@ bool ThunkCreator::normalizeExistingThunk(Relocation &rel, uint64_t src) {
                               rel.sym->getVA(rel.addend) + getPCBias(rel.type)))
       return true;
     rel.sym = &t->destination;
-    // TODO Restore addend on all targets.
-    if (config->emachine == EM_AARCH64 || config->emachine == EM_PPC64)
-      rel.addend = t->addend;
+    rel.addend = t->addend;
     if (rel.sym->isInPlt())
       rel.expr = toPlt(rel.expr);
   }
@@ -1900,16 +1935,11 @@ bool ThunkCreator::createThunks(ArrayRef<OutputSection *> outputSections) {
             rel.sym = t->getThunkTargetSym();
             rel.expr = fromPlt(rel.expr);
 
-            // On AArch64 and PPC64, a jump/call relocation may be encoded as
+            // On AArch64 and PPC, a jump/call relocation may be encoded as
             // STT_SECTION + non-zero addend, clear the addend after
             // redirection.
-            //
-            // The addend of R_PPC_PLTREL24 should be ignored after changing to
-            // R_PC.
-            if (config->emachine == EM_AARCH64 ||
-                config->emachine == EM_PPC64 ||
-                (config->emachine == EM_PPC && rel.type == R_PPC_PLTREL24))
-              rel.addend = 0;
+            if (config->emachine != EM_MIPS)
+              rel.addend = -getPCBias(rel.type);
           }
 
         for (auto &p : isd->thunkSections)
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index 550a5b38b89b..ea6eab4b47ad 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -2449,6 +2449,9 @@ PltSection::PltSection()
   if (config->emachine == EM_PPC || config->emachine == EM_PPC64) {
     name = ".glink";
     alignment = 4;
+    // PLTresolve is at the end.
+    if (config->emachine == EM_PPC)
+      footerSize = 64;
   }
 
   // On x86 when IBT is enabled, this section contains the second PLT (lazy
@@ -2486,7 +2489,7 @@ void PltSection::addEntry(Symbol &sym) {
 }
 
 size_t PltSection::getSize() const {
-  return headerSize + entries.size() * target->pltEntrySize;
+  return headerSize + entries.size() * target->pltEntrySize + footerSize;
 }
 
 bool PltSection::isNeeded() const {
@@ -3451,19 +3454,14 @@ bool ARMExidxSyntheticSection::classof(const SectionBase *d) {
 }
 
 ThunkSection::ThunkSection(OutputSection *os, uint64_t off)
-    : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS,
-                       config->wordsize, ".text.thunk") {
+    : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 4,
+                       ".text.thunk") {
   this->parent = os;
   this->outSecOff = off;
 }
 
-// When the errata patching is on, we round the size up to a 4 KiB
-// boundary. This limits the effect that adding Thunks has on the addresses
-// of the program modulo 4 KiB. As the errata patching is sensitive to address
-// modulo 4 KiB this can prevent further patches from being needed due to
-// Thunk insertion.
 size_t ThunkSection::getSize() const {
-  if (config->fixCortexA53Errata843419 || config->fixCortexA8)
+  if (roundUpSizeForErrata)
     return alignTo(size, 4096);
   return size;
 }
diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h
index f0a598dda51d..5f59178fb541 100644
--- a/lld/ELF/SyntheticSections.h
+++ b/lld/ELF/SyntheticSections.h
@@ -683,9 +683,9 @@ public:
   void addEntry(Symbol &sym);
   size_t getNumEntries() const { return entries.size(); }
 
-  size_t headerSize = 0;
+  size_t headerSize;
+  size_t footerSize = 0;
 
-private:
   std::vector<const Symbol *> entries;
 };
 
@@ -1069,6 +1069,10 @@ public:
   InputSection *getTargetInputSection() const;
   bool assignOffsets();
 
+  // When true, round up reported size of section to 4 KiB. See comment
+  // in addThunkSection() for more details.
+  bool roundUpSizeForErrata = false;
+
 private:
   std::vector<Thunk *> thunks;
   size_t size = 0;
diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp
index 7b927a434e36..f9c2e2d74e0a 100644
--- a/lld/ELF/Thunks.cpp
+++ b/lld/ELF/Thunks.cpp
@@ -245,8 +245,7 @@ public:
   // decide the offsets in the call stub.
   PPC32PltCallStub(const InputSection &isec, const Relocation &rel,
                    Symbol &dest)
-      : Thunk(dest, rel.type == R_PPC_PLTREL24 ? rel.addend : 0),
-        file(isec.file) {}
+      : Thunk(dest, rel.addend), file(isec.file) {}
   uint32_t size() override { return 16; }
   void writeTo(uint8_t *buf) override;
   void addSymbols(ThunkSection &isec) override;
@@ -257,6 +256,14 @@ private:
   const InputFile *file;
 };
 
+class PPC32LongThunk final : public Thunk {
+public:
+  PPC32LongThunk(Symbol &dest, int64_t addend) : Thunk(dest, addend) {}
+  uint32_t size() override { return config->isPic ? 32 : 16; }
+  void writeTo(uint8_t *buf) override;
+  void addSymbols(ThunkSection &isec) override;
+};
+
 // PPC64 Plt call stubs.
 // Any call site that needs to call through a plt entry needs a call stub in
 // the .text section. The call stub is responsible for:
@@ -765,6 +772,33 @@ bool PPC32PltCallStub::isCompatibleWith(const InputSection &isec,
   return !config->isPic || (isec.file == file && rel.addend == addend);
 }
 
+void PPC32LongThunk::addSymbols(ThunkSection &isec) {
+  addSymbol(saver.save("__LongThunk_" + destination.getName()), STT_FUNC, 0,
+            isec);
+}
+
+void PPC32LongThunk::writeTo(uint8_t *buf) {
+  auto ha = [](uint32_t v) -> uint16_t { return (v + 0x8000) >> 16; };
+  auto lo = [](uint32_t v) -> uint16_t { return v; };
+  uint32_t d = destination.getVA(addend);
+  if (config->isPic) {
+    uint32_t off = d - (getThunkTargetSym()->getVA() + 8);
+    write32(buf + 0, 0x7c0802a6);            // mflr r12,0
+    write32(buf + 4, 0x429f0005);            // bcl r20,r31,.+4
+    write32(buf + 8, 0x7d8802a6);            // mtctr r12
+    write32(buf + 12, 0x3d8c0000 | ha(off)); // addis r12,r12,off@ha
+    write32(buf + 16, 0x398c0000 | lo(off)); // addi r12,r12,off@l
+    write32(buf + 20, 0x7c0803a6);           // mtlr r0
+    buf += 24;
+  } else {
+    write32(buf + 0, 0x3d800000 | ha(d));    // lis r12,d@ha
+    write32(buf + 4, 0x398c0000 | lo(d));    // addi r12,r12,d@l
+    buf += 8;
+  }
+  write32(buf + 0, 0x7d8903a6);              // mtctr r12
+  write32(buf + 4, 0x4e800420);              // bctr
+}
+
 void writePPC64LoadAndBranch(uint8_t *buf, int64_t offset) {
   uint16_t offHa = (offset + 0x8000) >> 16;
   uint16_t offLo = offset & 0xffff;
@@ -902,9 +936,12 @@ static Thunk *addThunkMips(RelType type, Symbol &s) {
 
 static Thunk *addThunkPPC32(const InputSection &isec, const Relocation &rel,
                             Symbol &s) {
-  assert((rel.type == R_PPC_REL24 || rel.type == R_PPC_PLTREL24) &&
+  assert((rel.type == R_PPC_LOCAL24PC || rel.type == R_PPC_REL24 ||
+          rel.type == R_PPC_PLTREL24) &&
          "unexpected relocation type for thunk");
-  return make<PPC32PltCallStub>(isec, rel, s);
+  if (s.isInPlt())
+    return make<PPC32PltCallStub>(isec, rel, s);
+  return make<PPC32LongThunk>(s, rel.addend);
 }
 
 static Thunk *addThunkPPC64(RelType type, Symbol &s, int64_t a) {
diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst
index bc16417646c3..4e55f93882f1 100644
--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@@ -30,6 +30,14 @@ ELF Improvements
   with GNU now. (`r375051
   <https://github.com/llvm/llvm-project/commit/48993d5ab9413f0e5b94dfa292a233ce55b09e3e>`_)
 
+* New ``elf32btsmipn32_fbsd`` and ``elf32ltsmipn32_fbsd`` emulations
+  are supported.
+
+* Relax MIPS ``jalr``and ``jr`` instructions marked by the ``R_MIPS_JALR``
+  relocation.
+
+* Reduced size of linked MIPS binaries.
+
 COFF Improvements
 -----------------
 
@@ -38,7 +46,33 @@ COFF Improvements
 MinGW Improvements
 ------------------
 
-* ...
+* Allow using custom .edata sections from input object files (for use
+  by Wine)
+  (`dadc6f248868 <https://reviews.llvm.org/rGdadc6f248868>`)
+
+* Don't implicitly create import libraries unless requested
+  (`6540e55067e3 <https://reviews.llvm.org/rG6540e55067e3>`)
+
+* Support merging multiple resource object files
+  (`3d3a9b3b413d <https://reviews.llvm.org/rG3d3a9b3b413d>`)
+  and properly handle the default manifest object files that GCC can pass
+  (`d581dd501381 <https://reviews.llvm.org/rGd581dd501381>`)
+
+* Demangle itanium symbol names in warnings/error messages
+  (`a66fc1c99f3e <https://reviews.llvm.org/rGa66fc1c99f3e>`)
+
+* Print source locations for undefined references and duplicate symbols,
+  if possible
+  (`1d06d48bb346 <https://reviews.llvm.org/rG1d06d48bb346>`)
+  and
+  (`b38f577c015c <https://reviews.llvm.org/rGb38f577c015c>`)
+
+* Look for more filename patterns when resolving ``-l`` options
+  (`0226c35262df <https://reviews.llvm.org/rG0226c35262df>`)
+
+* Don't error out on duplicate absolute symbols with the same value
+  (which can happen for the default-null symbol for weak symbols)
+  (`1737cc750c46 <https://reviews.llvm.org/rG1737cc750c46>`)
 
 MachO Improvements
 ------------------
diff --git a/lldb/source/DataFormatters/FormatCache.cpp b/lldb/source/DataFormatters/FormatCache.cpp
index 231e7ed0c0a0..99f140705446 100644
--- a/lldb/source/DataFormatters/FormatCache.cpp
+++ b/lldb/source/DataFormatters/FormatCache.cpp
@@ -69,6 +69,8 @@ FormatCache::Entry &FormatCache::GetEntry(ConstString type) {
   return m_map[type];
 }
 
+namespace lldb_private {
+
 template<> bool FormatCache::Entry::IsCached<lldb::TypeFormatImplSP>() {
   return IsFormatCached();
 }
@@ -79,6 +81,8 @@ template<> bool FormatCache::Entry::IsCached<lldb::SyntheticChildrenSP>() {
   return IsSyntheticCached();
 }
 
+} // namespace lldb_private
+
 template <typename ImplSP>
 bool FormatCache::Get(ConstString type, ImplSP &format_impl_sp) {
   std::lock_guard<std::recursive_mutex> guard(m_mutex);
diff --git a/lldb/source/DataFormatters/LanguageCategory.cpp b/lldb/source/DataFormatters/LanguageCategory.cpp
index e18ec0feaa8b..daf8c7af7d1a 100644
--- a/lldb/source/DataFormatters/LanguageCategory.cpp
+++ b/lldb/source/DataFormatters/LanguageCategory.cpp
@@ -55,6 +55,8 @@ bool LanguageCategory::Get(FormattersMatchData &match_data,
   return result;
 }
 
+namespace lldb_private {
+
 /// Explicit instantiations for the three types.
 /// \{
 template bool
@@ -83,6 +85,8 @@ auto &LanguageCategory::GetHardcodedFinder<lldb::SyntheticChildrenSP>() {
   return m_hardcoded_synthetics;
 }
 
+} // namespace lldb_private
+
 template <typename ImplSP>
 bool LanguageCategory::GetHardcoded(FormatManager &fmt_mgr,
                                     FormattersMatchData &match_data,
diff --git a/lldb/source/Interpreter/CommandAlias.cpp b/lldb/source/Interpreter/CommandAlias.cpp
index 5139c53a47b3..5209a7bcbc4e 100644
--- a/lldb/source/Interpreter/CommandAlias.cpp
+++ b/lldb/source/Interpreter/CommandAlias.cpp
@@ -65,7 +65,8 @@ static bool ProcessAliasOptionsArgs(lldb::CommandObjectSP &cmd_obj_sp,
     else {
       for (auto &entry : args.entries()) {
         if (!entry.ref().empty())
-          option_arg_vector->emplace_back("<argument>", -1, entry.ref());
+          option_arg_vector->emplace_back(std::string("<argument>"), -1,
+                                          std::string(entry.ref()));
       }
     }
   }
diff --git a/lldb/source/Interpreter/Options.cpp b/lldb/source/Interpreter/Options.cpp
index 0bceea14269d..80e9d3a6fc15 100644
--- a/lldb/source/Interpreter/Options.cpp
+++ b/lldb/source/Interpreter/Options.cpp
@@ -1061,8 +1061,8 @@ llvm::Expected<Args> Options::ParseAlias(const Args &args,
     }
     if (!option_arg)
       option_arg = "<no-argument>";
-    option_arg_vector->emplace_back(option_str.GetString(), has_arg,
-                                    option_arg);
+    option_arg_vector->emplace_back(std::string(option_str.GetString()),
+                                    has_arg, std::string(option_arg));
 
     // Find option in the argument list; also see if it was supposed to take an
     // argument and if one was supplied.  Remove option (and argument, if
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
index 232063a6f339..6166aa77bda4 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
@@ -85,35 +85,6 @@ static bool DeclKindIsCXXClass(clang::Decl::Kind decl_kind) {
   return false;
 }
 
-struct BitfieldInfo {
-  uint64_t bit_size;
-  uint64_t bit_offset;
-
-  BitfieldInfo()
-      : bit_size(LLDB_INVALID_ADDRESS), bit_offset(LLDB_INVALID_ADDRESS) {}
-
-  void Clear() {
-    bit_size = LLDB_INVALID_ADDRESS;
-    bit_offset = LLDB_INVALID_ADDRESS;
-  }
-
-  bool IsValid() const {
-    return (bit_size != LLDB_INVALID_ADDRESS) &&
-           (bit_offset != LLDB_INVALID_ADDRESS);
-  }
-
-  bool NextBitfieldOffsetIsValid(const uint64_t next_bit_offset) const {
-    if (IsValid()) {
-      // This bitfield info is valid, so any subsequent bitfields must not
-      // overlap and must be at a higher bit offset than any previous bitfield
-      // + size.
-      return (bit_size + bit_offset) <= next_bit_offset;
-    } else {
-      // If the this BitfieldInfo is not valid, then any offset isOK
-      return true;
-    }
-  }
-};
 
 ClangASTImporter &DWARFASTParserClang::GetClangASTImporter() {
   if (!m_clang_ast_importer_up) {
@@ -2419,7 +2390,7 @@ void DWARFASTParserClang::ParseSingleMember(
     lldb::AccessType &default_accessibility,
     DelayedPropertyList &delayed_properties,
     lldb_private::ClangASTImporter::LayoutInfo &layout_info,
-    BitfieldInfo &last_field_info) {
+    FieldInfo &last_field_info) {
   ModuleSP module_sp = parent_die.GetDWARF()->GetObjectFile()->GetModule();
   const dw_tag_t tag = die.Tag();
   // Get the parent byte size so we can verify any members will fit
@@ -2453,6 +2424,14 @@ void DWARFASTParserClang::ParseSingleMember(
       const dw_attr_t attr = attributes.AttributeAtIndex(i);
       DWARFFormValue form_value;
       if (attributes.ExtractFormValueAtIndex(i, form_value)) {
+        // DW_AT_data_member_location indicates the byte offset of the
+        // word from the base address of the structure.
+        //
+        // DW_AT_bit_offset indicates how many bits into the word
+        // (according to the host endianness) the low-order bit of the
+        // field starts.  AT_bit_offset can be negative.
+        //
+        // DW_AT_bit_size indicates the size of the field in bits.
         switch (attr) {
         case DW_AT_name:
           name = form_value.AsCString();
@@ -2603,36 +2582,24 @@ void DWARFASTParserClang::ParseSingleMember(
       Type *member_type = die.ResolveTypeUID(encoding_form.Reference());
 
       clang::FieldDecl *field_decl = nullptr;
+      const uint64_t character_width = 8;
+      const uint64_t word_width = 32;
       if (tag == DW_TAG_member) {
         if (member_type) {
+          CompilerType member_clang_type = member_type->GetLayoutCompilerType();
+
           if (accessibility == eAccessNone)
             accessibility = default_accessibility;
           member_accessibilities.push_back(accessibility);
 
           uint64_t field_bit_offset =
               (member_byte_offset == UINT32_MAX ? 0 : (member_byte_offset * 8));
-          if (bit_size > 0) {
 
-            BitfieldInfo this_field_info;
+          if (bit_size > 0) {
+            FieldInfo this_field_info;
             this_field_info.bit_offset = field_bit_offset;
             this_field_info.bit_size = bit_size;
 
-            /////////////////////////////////////////////////////////////
-            // How to locate a field given the DWARF debug information
-            //
-            // AT_byte_size indicates the size of the word in which the bit
-            // offset must be interpreted.
-            //
-            // AT_data_member_location indicates the byte offset of the
-            // word from the base address of the structure.
-            //
-            // AT_bit_offset indicates how many bits into the word
-            // (according to the host endianness) the low-order bit of the
-            // field starts.  AT_bit_offset can be negative.
-            //
-            // AT_bit_size indicates the size of the field in bits.
-            /////////////////////////////////////////////////////////////
-
             if (data_bit_offset != UINT64_MAX) {
               this_field_info.bit_offset = data_bit_offset;
             } else {
@@ -2649,8 +2616,9 @@ void DWARFASTParserClang::ParseSingleMember(
             }
 
             if ((this_field_info.bit_offset >= parent_bit_size) ||
-                !last_field_info.NextBitfieldOffsetIsValid(
-                    this_field_info.bit_offset)) {
+                (last_field_info.IsBitfield() &&
+                 !last_field_info.NextBitfieldOffsetIsValid(
+                     this_field_info.bit_offset))) {
               ObjectFile *objfile = die.GetDWARF()->GetObjectFile();
               objfile->GetModule()->ReportWarning(
                   "0x%8.8" PRIx64 ": %s bitfield named \"%s\" has invalid "
@@ -2659,40 +2627,12 @@ void DWARFASTParserClang::ParseSingleMember(
                   "compiler and include the preprocessed output for %s\n",
                   die.GetID(), DW_TAG_value_to_name(tag), name,
                   this_field_info.bit_offset, GetUnitName(parent_die).c_str());
-              this_field_info.Clear();
               return;
             }
 
             // Update the field bit offset we will report for layout
             field_bit_offset = this_field_info.bit_offset;
 
-            // If the member to be emitted did not start on a character
-            // boundary and there is empty space between the last field and
-            // this one, then we need to emit an anonymous member filling
-            // up the space up to its start.  There are three cases here:
-            //
-            // 1 If the previous member ended on a character boundary, then
-            // we can emit an
-            //   anonymous member starting at the most recent character
-            //   boundary.
-            //
-            // 2 If the previous member did not end on a character boundary
-            // and the distance
-            //   from the end of the previous member to the current member
-            //   is less than a
-            //   word width, then we can emit an anonymous member starting
-            //   right after the
-            //   previous member and right before this member.
-            //
-            // 3 If the previous member did not end on a character boundary
-            // and the distance
-            //   from the end of the previous member to the current member
-            //   is greater than
-            //   or equal a word width, then we act as in Case 1.
-
-            const uint64_t character_width = 8;
-            const uint64_t word_width = 32;
-
             // Objective-C has invalid DW_AT_bit_offset values in older
             // versions of clang, so we have to be careful and only insert
             // unnamed bitfields if we have a new enough clang.
@@ -2704,53 +2644,57 @@ void DWARFASTParserClang::ParseSingleMember(
                   die.GetCU()->Supports_unnamed_objc_bitfields();
 
             if (detect_unnamed_bitfields) {
-              BitfieldInfo anon_field_info;
-
-              if ((this_field_info.bit_offset % character_width) !=
-                  0) // not char aligned
-              {
-                uint64_t last_field_end = 0;
-
-                if (last_field_info.IsValid())
-                  last_field_end =
-                      last_field_info.bit_offset + last_field_info.bit_size;
-
-                if (this_field_info.bit_offset != last_field_end) {
-                  if (((last_field_end % character_width) == 0) || // case 1
-                      (this_field_info.bit_offset - last_field_end >=
-                       word_width)) // case 3
-                  {
-                    anon_field_info.bit_size =
-                        this_field_info.bit_offset % character_width;
-                    anon_field_info.bit_offset =
-                        this_field_info.bit_offset - anon_field_info.bit_size;
-                  } else // case 2
-                  {
-                    anon_field_info.bit_size =
-                        this_field_info.bit_offset - last_field_end;
-                    anon_field_info.bit_offset = last_field_end;
-                  }
-                }
+              clang::Optional<FieldInfo> unnamed_field_info;
+              uint64_t last_field_end = 0;
+
+              last_field_end =
+                  last_field_info.bit_offset + last_field_info.bit_size;
+
+              if (!last_field_info.IsBitfield()) {
+                // The last field was not a bit-field...
+                // but if it did take up the entire word then we need to extend
+                // last_field_end so the bit-field does not step into the last
+                // fields padding.
+                if (last_field_end != 0 && ((last_field_end % word_width) != 0))
+                  last_field_end += word_width - (last_field_end % word_width);
               }
 
-              if (anon_field_info.IsValid()) {
+              // If we have a gap between the last_field_end and the current
+              // field we have an unnamed bit-field
+              if (this_field_info.bit_offset != last_field_end &&
+                  !(this_field_info.bit_offset < last_field_end)) {
+                unnamed_field_info = FieldInfo{};
+                unnamed_field_info->bit_size =
+                    this_field_info.bit_offset - last_field_end;
+                unnamed_field_info->bit_offset = last_field_end;
+              }
+
+              if (unnamed_field_info) {
                 clang::FieldDecl *unnamed_bitfield_decl =
                     ClangASTContext::AddFieldToRecordType(
                         class_clang_type, llvm::StringRef(),
                         m_ast.GetBuiltinTypeForEncodingAndBitSize(eEncodingSint,
                                                                   word_width),
-                        accessibility, anon_field_info.bit_size);
+                        accessibility, unnamed_field_info->bit_size);
 
                 layout_info.field_offsets.insert(std::make_pair(
-                    unnamed_bitfield_decl, anon_field_info.bit_offset));
+                    unnamed_bitfield_decl, unnamed_field_info->bit_offset));
               }
             }
+
             last_field_info = this_field_info;
+            last_field_info.SetIsBitfield(true);
           } else {
-            last_field_info.Clear();
+            last_field_info.bit_offset = field_bit_offset;
+
+            if (llvm::Optional<uint64_t> clang_type_size =
+                    member_clang_type.GetByteSize(nullptr)) {
+              last_field_info.bit_size = *clang_type_size * character_width;
+            }
+
+            last_field_info.SetIsBitfield(false);
           }
 
-          CompilerType member_clang_type = member_type->GetLayoutCompilerType();
           if (!member_clang_type.IsCompleteType())
             member_clang_type.GetCompleteType();
 
@@ -2885,7 +2829,7 @@ bool DWARFASTParserClang::ParseChildMembers(
   if (!parent_die)
     return false;
 
-  BitfieldInfo last_field_info;
+  FieldInfo last_field_info;
 
   ModuleSP module_sp = parent_die.GetDWARF()->GetObjectFile()->GetModule();
   ClangASTContext *ast =
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h
index 4ad757247c3e..8a78299c8b10 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h
@@ -170,33 +170,20 @@ protected:
   lldb::ModuleSP GetModuleForType(const DWARFDIE &die);
 
 private:
-  struct BitfieldInfo {
-    uint64_t bit_size;
-    uint64_t bit_offset;
+  struct FieldInfo {
+    uint64_t bit_size = 0;
+    uint64_t bit_offset = 0;
+    bool is_bitfield = false;
 
-    BitfieldInfo()
-        : bit_size(LLDB_INVALID_ADDRESS), bit_offset(LLDB_INVALID_ADDRESS) {}
+    FieldInfo() = default;
 
-    void Clear() {
-      bit_size = LLDB_INVALID_ADDRESS;
-      bit_offset = LLDB_INVALID_ADDRESS;
-    }
-
-    bool IsValid() const {
-      return (bit_size != LLDB_INVALID_ADDRESS) &&
-             (bit_offset != LLDB_INVALID_ADDRESS);
-    }
+    void SetIsBitfield(bool flag) { is_bitfield = flag; }
+    bool IsBitfield() { return is_bitfield; }
 
     bool NextBitfieldOffsetIsValid(const uint64_t next_bit_offset) const {
-      if (IsValid()) {
-        // This bitfield info is valid, so any subsequent bitfields must not
-        // overlap and must be at a higher bit offset than any previous bitfield
-        // + size.
-        return (bit_size + bit_offset) <= next_bit_offset;
-      } else {
-        // If the this BitfieldInfo is not valid, then any offset isOK
-        return true;
-      }
+      // Any subsequent bitfields must not overlap and must be at a higher
+      // bit offset than any previous bitfield + size.
+      return (bit_size + bit_offset) <= next_bit_offset;
     }
   };
 
@@ -208,7 +195,7 @@ private:
                     lldb::AccessType &default_accessibility,
                     DelayedPropertyList &delayed_properties,
                     lldb_private::ClangASTImporter::LayoutInfo &layout_info,
-                    BitfieldInfo &last_field_info);
+                    FieldInfo &last_field_info);
 
   bool CompleteRecordType(const DWARFDIE &die, lldb_private::Type *type,
                           lldb_private::CompilerType &clang_type);
diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h
index 9bfaaccd953e..f06d18720c3a 100644
--- a/llvm/include/llvm/ADT/StringRef.h
+++ b/llvm/include/llvm/ADT/StringRef.h
@@ -77,7 +77,8 @@ namespace llvm {
     static constexpr size_t strLen(const char *Str) {
 #if __cplusplus > 201402L
       return std::char_traits<char>::length(Str);
-#elif __has_builtin(__builtin_strlen) || defined(__GNUC__) || defined(_MSC_VER)
+#elif __has_builtin(__builtin_strlen) || defined(__GNUC__) || \
+    (defined(_MSC_VER) && _MSC_VER >= 1916)
       return __builtin_strlen(Str);
 #else
       const char *Begin = Str;
diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
index a860ce2773e1..c710c5d7055c 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -135,7 +135,6 @@ public:
   MapVector<const MCSymbol *, GOTEquivUsePair> GlobalGOTEquivs;
 
 private:
-  MCSymbol *CurrentFnBegin = nullptr;
   MCSymbol *CurrentFnEnd = nullptr;
   MCSymbol *CurExceptionSym = nullptr;
 
@@ -148,6 +147,8 @@ private:
   static char ID;
 
 protected:
+  MCSymbol *CurrentFnBegin = nullptr;
+
   /// Protected struct HandlerInfo and Handlers permit target extended
   /// AsmPrinter adds their own handlers.
   struct HandlerInfo {
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h
index eb6d84e8cbb4..218afda1b546 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h
@@ -13,7 +13,9 @@
 #ifndef LLVM_EXECUTIONENGINE_ORC_COMPILEUTILS_H
 #define LLVM_EXECUTIONENGINE_ORC_COMPILEUTILS_H
 
+#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
 #include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
+#include "llvm/ExecutionEngine/Orc/Layer.h"
 #include <memory>
 
 namespace llvm {
@@ -28,24 +30,31 @@ namespace orc {
 
 class JITTargetMachineBuilder;
 
+IRMaterializationUnit::ManglingOptions
+irManglingOptionsFromTargetOptions(const TargetOptions &Opts);
+
 /// Simple compile functor: Takes a single IR module and returns an ObjectFile.
 /// This compiler supports a single compilation thread and LLVMContext only.
 /// For multithreaded compilation, use ConcurrentIRCompiler below.
-class SimpleCompiler {
+class SimpleCompiler : public IRCompileLayer::IRCompiler {
 public:
   using CompileResult = std::unique_ptr<MemoryBuffer>;
 
   /// Construct a simple compile functor with the given target.
   SimpleCompiler(TargetMachine &TM, ObjectCache *ObjCache = nullptr)
-    : TM(TM), ObjCache(ObjCache) {}
+      : IRCompiler(irManglingOptionsFromTargetOptions(TM.Options)), TM(TM),
+        ObjCache(ObjCache) {}
 
   /// Set an ObjectCache to query before compiling.
   void setObjectCache(ObjectCache *NewCache) { ObjCache = NewCache; }
 
   /// Compile a Module to an ObjectFile.
-  CompileResult operator()(Module &M);
+  Expected<CompileResult> operator()(Module &M) override;
 
 private:
+  IRMaterializationUnit::ManglingOptions
+  manglingOptionsForTargetMachine(const TargetMachine &TM);
+
   CompileResult tryToLoadFromObjectCache(const Module &M);
   void notifyObjectCompiled(const Module &M, const MemoryBuffer &ObjBuffer);
 
@@ -73,14 +82,14 @@ private:
 ///
 /// This class creates a new TargetMachine and SimpleCompiler instance for each
 /// compile.
-class ConcurrentIRCompiler {
+class ConcurrentIRCompiler : public IRCompileLayer::IRCompiler {
 public:
   ConcurrentIRCompiler(JITTargetMachineBuilder JTMB,
                        ObjectCache *ObjCache = nullptr);
 
   void setObjectCache(ObjectCache *ObjCache) { this->ObjCache = ObjCache; }
 
-  std::unique_ptr<MemoryBuffer> operator()(Module &M);
+  Expected<std::unique_ptr<MemoryBuffer>> operator()(Module &M) override;
 
 private:
   JITTargetMachineBuilder JTMB;
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
index d0a9ca5c0580..ecba454887b3 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
@@ -489,13 +489,18 @@ public:
   /// is guaranteed to return Error::success() and can be wrapped with cantFail.
   Error notifyEmitted();
 
-  /// Adds new symbols to the JITDylib and this responsibility instance.
-  ///        JITDylib entries start out in the materializing state.
+  /// Attempt to claim responsibility for new definitions. This method can be
+  /// used to claim responsibility for symbols that are added to a
+  /// materialization unit during the compilation process (e.g. literal pool
+  /// symbols). Symbol linkage rules are the same as for symbols that are
+  /// defined up front: duplicate strong definitions will result in errors.
+  /// Duplicate weak definitions will be discarded (in which case they will
+  /// not be added to this responsibility instance).
   ///
   ///   This method can be used by materialization units that want to add
   /// additional symbols at materialization time (e.g. stubs, compile
   /// callbacks, metadata).
-  Error defineMaterializing(const SymbolFlagsMap &SymbolFlags);
+  Error defineMaterializing(SymbolFlagsMap SymbolFlags);
 
   /// Notify all not-yet-emitted covered by this MaterializationResponsibility
   /// instance that an error has occurred.
@@ -1023,7 +1028,7 @@ private:
                                        const SymbolStringPtr &DependantName,
                                        MaterializingInfo &EmittedMI);
 
-  Error defineMaterializing(const SymbolFlagsMap &SymbolFlags);
+  Expected<SymbolFlagsMap> defineMaterializing(SymbolFlagsMap SymbolFlags);
 
   void replace(std::unique_ptr<MaterializationUnit> MU);
 
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
index 52223a83ad42..bb8270fe80a3 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
@@ -29,14 +29,29 @@ namespace orc {
 
 class IRCompileLayer : public IRLayer {
 public:
-  using CompileFunction =
-      std::function<Expected<std::unique_ptr<MemoryBuffer>>(Module &)>;
+  class IRCompiler {
+  public:
+    IRCompiler(IRMaterializationUnit::ManglingOptions MO) : MO(std::move(MO)) {}
+    virtual ~IRCompiler();
+    const IRMaterializationUnit::ManglingOptions &getManglingOptions() const {
+      return MO;
+    }
+    virtual Expected<std::unique_ptr<MemoryBuffer>> operator()(Module &M) = 0;
+
+  protected:
+    IRMaterializationUnit::ManglingOptions &manglingOptions() { return MO; }
+
+  private:
+    IRMaterializationUnit::ManglingOptions MO;
+  };
 
   using NotifyCompiledFunction =
       std::function<void(VModuleKey K, ThreadSafeModule TSM)>;
 
   IRCompileLayer(ExecutionSession &ES, ObjectLayer &BaseLayer,
-                 CompileFunction Compile);
+                 std::unique_ptr<IRCompiler> Compile);
+
+  IRCompiler &getCompiler() { return *Compile; }
 
   void setNotifyCompiled(NotifyCompiledFunction NotifyCompiled);
 
@@ -45,7 +60,8 @@ public:
 private:
   mutable std::mutex IRLayerMutex;
   ObjectLayer &BaseLayer;
-  CompileFunction Compile;
+  std::unique_ptr<IRCompiler> Compile;
+  const IRMaterializationUnit::ManglingOptions *ManglingOpts;
   NotifyCompiledFunction NotifyCompiled = NotifyCompiledFunction();
 };
 
@@ -90,7 +106,10 @@ public:
   /// Compile the module, and add the resulting object to the base layer
   ///        along with the given memory manager and symbol resolver.
   Error addModule(VModuleKey K, std::unique_ptr<Module> M) {
-    if (auto Err = BaseLayer.addObject(std::move(K), Compile(*M)))
+    auto Obj = Compile(*M);
+    if (!Obj)
+      return Obj.takeError();
+    if (auto Err = BaseLayer.addObject(std::move(K), std::move(*Obj)))
       return Err;
     if (NotifyCompiled)
       NotifyCompiled(std::move(K), std::move(M));
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
index c048ff3d5522..8e4760024aa8 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
@@ -124,7 +124,7 @@ protected:
   static std::unique_ptr<ObjectLayer>
   createObjectLinkingLayer(LLJITBuilderState &S, ExecutionSession &ES);
 
-  static Expected<IRCompileLayer::CompileFunction>
+  static Expected<std::unique_ptr<IRCompileLayer::IRCompiler>>
   createCompileFunction(LLJITBuilderState &S, JITTargetMachineBuilder JTMB);
 
   /// Create an LLJIT instance with a single compile thread.
@@ -192,7 +192,7 @@ public:
       ExecutionSession &, const Triple &TT)>;
 
   using CompileFunctionCreator =
-      std::function<Expected<IRCompileLayer::CompileFunction>(
+      std::function<Expected<std::unique_ptr<IRCompileLayer::IRCompiler>>(
           JITTargetMachineBuilder JTMB)>;
 
   std::unique_ptr<ExecutionSession> ES;
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Layer.h b/llvm/include/llvm/ExecutionEngine/Orc/Layer.h
index 8f9bd704395e..95e32b2431a0 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Layer.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Layer.h
@@ -21,15 +21,62 @@
 namespace llvm {
 namespace orc {
 
+/// IRMaterializationUnit is a convenient base class for MaterializationUnits
+/// wrapping LLVM IR. Represents materialization responsibility for all symbols
+/// in the given module. If symbols are overridden by other definitions, then
+/// their linkage is changed to available-externally.
+class IRMaterializationUnit : public MaterializationUnit {
+public:
+  struct ManglingOptions {
+    bool EmulatedTLS = false;
+  };
+
+  using SymbolNameToDefinitionMap = std::map<SymbolStringPtr, GlobalValue *>;
+
+  /// Create an IRMaterializationLayer. Scans the module to build the
+  /// SymbolFlags and SymbolToDefinition maps.
+  IRMaterializationUnit(ExecutionSession &ES, const ManglingOptions &MO,
+                        ThreadSafeModule TSM, VModuleKey K);
+
+  /// Create an IRMaterializationLayer from a module, and pre-existing
+  /// SymbolFlags and SymbolToDefinition maps. The maps must provide
+  /// entries for each definition in M.
+  /// This constructor is useful for delegating work from one
+  /// IRMaterializationUnit to another.
+  IRMaterializationUnit(ThreadSafeModule TSM, VModuleKey K,
+                        SymbolFlagsMap SymbolFlags,
+                        SymbolNameToDefinitionMap SymbolToDefinition);
+
+  /// Return the ModuleIdentifier as the name for this MaterializationUnit.
+  StringRef getName() const override;
+
+  const ThreadSafeModule &getModule() const { return TSM; }
+
+protected:
+  ThreadSafeModule TSM;
+  SymbolNameToDefinitionMap SymbolToDefinition;
+
+private:
+  void discard(const JITDylib &JD, const SymbolStringPtr &Name) override;
+};
+
 /// Interface for layers that accept LLVM IR.
 class IRLayer {
 public:
-  IRLayer(ExecutionSession &ES);
+  IRLayer(ExecutionSession &ES,
+          const IRMaterializationUnit::ManglingOptions *&MO)
+      : ES(ES), MO(MO) {}
+
   virtual ~IRLayer();
 
   /// Returns the ExecutionSession for this layer.
   ExecutionSession &getExecutionSession() { return ES; }
 
+  /// Get the mangling options for this layer.
+  const IRMaterializationUnit::ManglingOptions *&getManglingOptions() const {
+    return MO;
+  }
+
   /// Sets the CloneToNewContextOnEmit flag (false by default).
   ///
   /// When set, IR modules added to this layer will be cloned on to a new
@@ -57,49 +104,15 @@ public:
 private:
   bool CloneToNewContextOnEmit = false;
   ExecutionSession &ES;
-};
-
-/// IRMaterializationUnit is a convenient base class for MaterializationUnits
-/// wrapping LLVM IR. Represents materialization responsibility for all symbols
-/// in the given module. If symbols are overridden by other definitions, then
-/// their linkage is changed to available-externally.
-class IRMaterializationUnit : public MaterializationUnit {
-public:
-  using SymbolNameToDefinitionMap = std::map<SymbolStringPtr, GlobalValue *>;
-
-  /// Create an IRMaterializationLayer. Scans the module to build the
-  /// SymbolFlags and SymbolToDefinition maps.
-  IRMaterializationUnit(ExecutionSession &ES, ThreadSafeModule TSM,
-                        VModuleKey K);
-
-  /// Create an IRMaterializationLayer from a module, and pre-existing
-  /// SymbolFlags and SymbolToDefinition maps. The maps must provide
-  /// entries for each definition in M.
-  /// This constructor is useful for delegating work from one
-  /// IRMaterializationUnit to another.
-  IRMaterializationUnit(ThreadSafeModule TSM, VModuleKey K,
-                        SymbolFlagsMap SymbolFlags,
-                        SymbolNameToDefinitionMap SymbolToDefinition);
-
-  /// Return the ModuleIdentifier as the name for this MaterializationUnit.
-  StringRef getName() const override;
-
-  const ThreadSafeModule &getModule() const { return TSM; }
-
-protected:
-  ThreadSafeModule TSM;
-  SymbolNameToDefinitionMap SymbolToDefinition;
-
-private:
-  void discard(const JITDylib &JD, const SymbolStringPtr &Name) override;
+  const IRMaterializationUnit::ManglingOptions *&MO;
 };
 
 /// MaterializationUnit that materializes modules by calling the 'emit' method
 /// on the given IRLayer.
 class BasicIRLayerMaterializationUnit : public IRMaterializationUnit {
 public:
-  BasicIRLayerMaterializationUnit(IRLayer &L, VModuleKey K,
-                                  ThreadSafeModule TSM);
+  BasicIRLayerMaterializationUnit(IRLayer &L, const ManglingOptions &MO,
+                                  ThreadSafeModule TSM, VModuleKey K);
 
 private:
 
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h
index f6b86bb23167..97a3dc365457 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h
@@ -182,8 +182,8 @@ public:
   IRSpeculationLayer(ExecutionSession &ES, IRCompileLayer &BaseLayer,
                      Speculator &Spec, MangleAndInterner &Mangle,
                      ResultEval Interpreter)
-      : IRLayer(ES), NextLayer(BaseLayer), S(Spec), Mangle(Mangle),
-        QueryAnalysis(Interpreter) {}
+      : IRLayer(ES, BaseLayer.getManglingOptions()), NextLayer(BaseLayer),
+        S(Spec), Mangle(Mangle), QueryAnalysis(Interpreter) {}
 
   void emit(MaterializationResponsibility R, ThreadSafeModule TSM);
 
diff --git a/llvm/include/llvm/Support/CrashRecoveryContext.h b/llvm/include/llvm/Support/CrashRecoveryContext.h
index 9522c4742244..61a1bd405a4d 100644
--- a/llvm/include/llvm/Support/CrashRecoveryContext.h
+++ b/llvm/include/llvm/Support/CrashRecoveryContext.h
@@ -99,7 +99,8 @@ public:
 
   /// Explicitly trigger a crash recovery in the current process, and
   /// return failure from RunSafely(). This function does not return.
-  void HandleCrash();
+  LLVM_ATTRIBUTE_NORETURN
+  void HandleExit(int RetCode);
 
   /// In case of a crash, this is the crash identifier.
   int RetCode = 0;
diff --git a/llvm/include/llvm/Support/Process.h b/llvm/include/llvm/Support/Process.h
index 67e37912519b..e934b7413c17 100644
--- a/llvm/include/llvm/Support/Process.h
+++ b/llvm/include/llvm/Support/Process.h
@@ -201,6 +201,12 @@ public:
   /// Get the result of a process wide random number generator. The
   /// generator will be automatically seeded in non-deterministic fashion.
   static unsigned GetRandomNumber();
+
+  /// Equivalent to ::exit(), except when running inside a CrashRecoveryContext.
+  /// In that case, the control flow will resume after RunSafely(), like for a
+  /// crash, rather than exiting the current process.
+  LLVM_ATTRIBUTE_NORETURN
+  static void Exit(int RetCode);
 };
 
 }
diff --git a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
index 63ff00afc2ae..ababa1d61f66 100644
--- a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
+++ b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
@@ -62,6 +62,8 @@ public:
   typedef std::function<void(const PassManagerBuilder &Builder,
                              legacy::PassManagerBase &PM)>
       ExtensionFn;
+  typedef int GlobalExtensionID;
+
   enum ExtensionPointTy {
     /// EP_EarlyAsPossible - This extension point allows adding passes before
     /// any other transformations, allowing them to see the code as it is coming
@@ -193,7 +195,17 @@ public:
   /// Adds an extension that will be used by all PassManagerBuilder instances.
   /// This is intended to be used by plugins, to register a set of
   /// optimisations to run automatically.
-  static void addGlobalExtension(ExtensionPointTy Ty, ExtensionFn Fn);
+  ///
+  /// \returns A global extension identifier that can be used to remove the
+  /// extension.
+  static GlobalExtensionID addGlobalExtension(ExtensionPointTy Ty,
+                                              ExtensionFn Fn);
+  /// Removes an extension that was previously added using addGlobalExtension.
+  /// This is also intended to be used by plugins, to remove any extension that
+  /// was previously registered before being unloaded.
+  ///
+  /// \param ExtensionID Identifier of the extension to be removed.
+  static void removeGlobalExtension(GlobalExtensionID ExtensionID);
   void addExtension(ExtensionPointTy Ty, ExtensionFn Fn);
 
 private:
@@ -222,10 +234,20 @@ public:
 /// used by optimizer plugins to allow all front ends to transparently use
 /// them.  Create a static instance of this class in your plugin, providing a
 /// private function that the PassManagerBuilder can use to add your passes.
-struct RegisterStandardPasses {
+class RegisterStandardPasses {
+  PassManagerBuilder::GlobalExtensionID ExtensionID;
+
+public:
   RegisterStandardPasses(PassManagerBuilder::ExtensionPointTy Ty,
                          PassManagerBuilder::ExtensionFn Fn) {
-    PassManagerBuilder::addGlobalExtension(Ty, std::move(Fn));
+    ExtensionID = PassManagerBuilder::addGlobalExtension(Ty, std::move(Fn));
+  }
+
+  ~RegisterStandardPasses() {
+    // If the collection holding the global extensions is destroyed after the
+    // plugin is unloaded, the extension has to be removed here. Indeed, the
+    // destructor of the ExtensionFn may reference code in the plugin.
+    PassManagerBuilder::removeGlobalExtension(ExtensionID);
   }
 };
 
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 3516f4a7b370..20cd9da31fbd 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -709,15 +709,21 @@ void AsmPrinter::EmitFunctionHeader() {
   // Emit M NOPs for -fpatchable-function-entry=N,M where M>0. We arbitrarily
   // place prefix data before NOPs.
   unsigned PatchableFunctionPrefix = 0;
+  unsigned PatchableFunctionEntry = 0;
   (void)F.getFnAttribute("patchable-function-prefix")
       .getValueAsString()
       .getAsInteger(10, PatchableFunctionPrefix);
+  (void)F.getFnAttribute("patchable-function-entry")
+      .getValueAsString()
+      .getAsInteger(10, PatchableFunctionEntry);
   if (PatchableFunctionPrefix) {
     CurrentPatchableFunctionEntrySym =
         OutContext.createLinkerPrivateTempSymbol();
     OutStreamer->EmitLabel(CurrentPatchableFunctionEntrySym);
     emitNops(PatchableFunctionPrefix);
-  } else {
+  } else if (PatchableFunctionEntry) {
+    // May be reassigned when emitting the body, to reference the label after
+    // the initial BTI (AArch64) or endbr32/endbr64 (x86).
     CurrentPatchableFunctionEntrySym = CurrentFnBegin;
   }
 
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 38011102c7b3..e97bcd62e8c7 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -968,8 +968,8 @@ DIE &DwarfCompileUnit::constructCallSiteEntryDIE(
     addAddress(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_target),
                MachineLocation(CallReg));
   } else {
-    DIE *CalleeDIE = getDIE(CalleeSP);
-    assert(CalleeDIE && "Could not find DIE for call site entry origin");
+    DIE *CalleeDIE = getOrCreateSubprogramDIE(CalleeSP);
+    assert(CalleeDIE && "Could not create DIE for call site entry origin");
     addDIEEntry(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_origin),
                 *CalleeDIE);
   }
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index fa6800de7955..6e643ad26410 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -540,14 +540,6 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU,
   }
 }
 
-DIE &DwarfDebug::constructSubprogramDefinitionDIE(const DISubprogram *SP) {
-  DICompileUnit *Unit = SP->getUnit();
-  assert(SP->isDefinition() && "Subprogram not a definition");
-  assert(Unit && "Subprogram definition without parent unit");
-  auto &CU = getOrCreateDwarfCompileUnit(Unit);
-  return *CU.getOrCreateSubprogramDIE(SP);
-}
-
 /// Try to interpret values loaded into registers that forward parameters
 /// for \p CallMI. Store parameters with interpreted value into \p Params.
 static void collectCallSiteParameters(const MachineInstr *CallMI,
@@ -758,17 +750,6 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
         if (!CalleeDecl || !CalleeDecl->getSubprogram())
           continue;
         CalleeSP = CalleeDecl->getSubprogram();
-
-        if (CalleeSP->isDefinition()) {
-          // Ensure that a subprogram DIE for the callee is available in the
-          // appropriate CU.
-          constructSubprogramDefinitionDIE(CalleeSP);
-        } else {
-          // Create the declaration DIE if it is missing. This is required to
-          // support compilation of old bitcode with an incomplete list of
-          // retained metadata.
-          CU.getOrCreateSubprogramDIE(CalleeSP);
-        }
       }
 
       // TODO: Omit call site entries for runtime calls (objc_msgSend, etc).
@@ -924,6 +905,11 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
     NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
   }
 
+  // Create DIEs for function declarations used for call site debug info.
+  for (auto Scope : DIUnit->getRetainedTypes())
+    if (auto *SP = dyn_cast_or_null<DISubprogram>(Scope))
+      NewCU.getOrCreateSubprogramDIE(SP);
+
   CUMap.insert({DIUnit, &NewCU});
   CUDieMap.insert({&NewCU.getUnitDie(), &NewCU});
   return NewCU;
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index fd82b1f98055..f90dd48458ea 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -442,9 +442,6 @@ class DwarfDebug : public DebugHandlerBase {
   /// Construct a DIE for this abstract scope.
   void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, LexicalScope *Scope);
 
-  /// Construct a DIE for the subprogram definition \p SP and return it.
-  DIE &constructSubprogramDefinitionDIE(const DISubprogram *SP);
-
   /// Construct DIEs for call site entries describing the calls in \p MF.
   void constructCallSiteEntryDIEs(const DISubprogram &SP, DwarfCompileUnit &CU,
                                   DIE &ScopeDIE, const MachineFunction &MF);
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 1aba956c48de..53747aef77fd 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -188,9 +188,8 @@ int64_t DwarfUnit::getDefaultLowerBound() const {
 
 /// Check whether the DIE for this MDNode can be shared across CUs.
 bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const {
-  // When the MDNode can be part of the type system (this includes subprogram
-  // declarations *and* subprogram definitions, even local definitions), the
-  // DIE must be shared across CUs.
+  // When the MDNode can be part of the type system, the DIE can be shared
+  // across CUs.
   // Combining type units and cross-CU DIE sharing is lower value (since
   // cross-CU DIE sharing is used in LTO and removes type redundancy at that
   // level already) but may be implementable for some value in projects
@@ -198,7 +197,9 @@ bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const {
   // together.
   if (isDwoUnit() && !DD->shareAcrossDWOCUs())
     return false;
-  return (isa<DIType>(D) || isa<DISubprogram>(D)) && !DD->generateTypeUnits();
+  return (isa<DIType>(D) ||
+          (isa<DISubprogram>(D) && !cast<DISubprogram>(D)->isDefinition())) &&
+         !DD->generateTypeUnits();
 }
 
 DIE *DwarfUnit::getDIE(const DINode *D) const {
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 003db39fe5f9..7d77664fbf69 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -6857,12 +6857,20 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
     Value *Addr = Builder.CreateBitCast(
         SI.getOperand(1),
         SplitStoreType->getPointerTo(SI.getPointerAddressSpace()));
-    if ((IsLE && Upper) || (!IsLE && !Upper))
+    const bool IsOffsetStore = (IsLE && Upper) || (!IsLE && !Upper);
+    if (IsOffsetStore)
       Addr = Builder.CreateGEP(
           SplitStoreType, Addr,
           ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));
+    MaybeAlign Alignment(SI.getAlignment());
+    if (IsOffsetStore && Alignment) {
+      // When splitting the store in half, naturally one half will retain the
+      // alignment of the original wider store, regardless of whether it was
+      // over-aligned or not, while the other will require adjustment.
+      Alignment = commonAlignment(Alignment, HalfValBitSize / 8);
+    }
     Builder.CreateAlignedStore(
-        V, Addr, Upper ? SI.getAlignment() / 2 : SI.getAlignment());
+        V, Addr, Alignment.hasValue() ? Alignment.getValue().value() : 0);
   };
 
   CreateSplitStore(LValue, false);
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 17eca2b0301c..96e794b15a44 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1385,7 +1385,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
     if (!V) {
       // Currently the optimizer can produce this; insert an undef to
       // help debugging.  Probably the optimizer should not do this.
-      MIRBuilder.buildDirectDbgValue(0, DI.getVariable(), DI.getExpression());
+      MIRBuilder.buildIndirectDbgValue(0, DI.getVariable(), DI.getExpression());
     } else if (const auto *CI = dyn_cast<Constant>(V)) {
       MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression());
     } else {
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 67d9dacda61b..3f6622723bdc 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -107,13 +107,9 @@ MachineIRBuilder::buildIndirectDbgValue(Register Reg, const MDNode *Variable,
   assert(
       cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) &&
       "Expected inlined-at fields to agree");
-  // DBG_VALUE insts now carry IR-level indirection in their DIExpression
-  // rather than encoding it in the instruction itself.
-  const DIExpression *DIExpr = cast<DIExpression>(Expr);
-  DIExpr = DIExpression::append(DIExpr, {dwarf::DW_OP_deref});
   return insertInstr(BuildMI(getMF(), getDL(),
                              getTII().get(TargetOpcode::DBG_VALUE),
-                             /*IsIndirect*/ false, Reg, Variable, DIExpr));
+                             /*IsIndirect*/ true, Reg, Variable, Expr));
 }
 
 MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI,
@@ -124,15 +120,11 @@ MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI,
   assert(
       cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) &&
       "Expected inlined-at fields to agree");
-  // DBG_VALUE insts now carry IR-level indirection in their DIExpression
-  // rather than encoding it in the instruction itself.
-  const DIExpression *DIExpr = cast<DIExpression>(Expr);
-  DIExpr = DIExpression::append(DIExpr, {dwarf::DW_OP_deref});
   return buildInstr(TargetOpcode::DBG_VALUE)
       .addFrameIndex(FI)
-      .addReg(0)
+      .addImm(0)
       .addMetadata(Variable)
-      .addMetadata(DIExpr);
+      .addMetadata(Expr);
 }
 
 MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
@@ -156,7 +148,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
     MIB.addReg(0U);
   }
 
-  return MIB.addReg(0).addMetadata(Variable).addMetadata(Expr);
+  return MIB.addImm(0).addMetadata(Variable).addMetadata(Expr);
 }
 
 MachineInstrBuilder MachineIRBuilder::buildDbgLabel(const MDNode *Label) {
diff --git a/llvm/lib/CodeGen/GlobalMerge.cpp b/llvm/lib/CodeGen/GlobalMerge.cpp
index 5870e20d4227..6e5593abb43e 100644
--- a/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -524,6 +524,7 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
     for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) {
       GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
       std::string Name = Globals[k]->getName();
+      GlobalValue::VisibilityTypes Visibility = Globals[k]->getVisibility();
       GlobalValue::DLLStorageClassTypes DLLStorage =
           Globals[k]->getDLLStorageClass();
 
@@ -549,6 +550,7 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
       if (Linkage != GlobalValue::InternalLinkage || !IsMachO) {
         GlobalAlias *GA = GlobalAlias::create(Tys[StructIdxs[idx]], AddrSpace,
                                               Linkage, Name, GEP, &M);
+        GA->setVisibility(Visibility);
         GA->setDLLStorageClass(DLLStorage);
       }
 
diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 2cc547a6b741..5b20a2482b7b 100644
--- a/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -100,27 +100,28 @@ enum : unsigned { UndefLocNo = ~0U };
 /// usage of the location.
 class DbgValueLocation {
 public:
-  DbgValueLocation(unsigned LocNo)
-      : LocNo(LocNo) {
+  DbgValueLocation(unsigned LocNo, bool WasIndirect)
+      : LocNo(LocNo), WasIndirect(WasIndirect) {
     static_assert(sizeof(*this) == sizeof(unsigned), "bad bitfield packing");
     assert(locNo() == LocNo && "location truncation");
   }
 
-  DbgValueLocation() : LocNo(0) {}
+  DbgValueLocation() : LocNo(0), WasIndirect(0) {}
 
   unsigned locNo() const {
     // Fix up the undef location number, which gets truncated.
     return LocNo == INT_MAX ? UndefLocNo : LocNo;
   }
+  bool wasIndirect() const { return WasIndirect; }
   bool isUndef() const { return locNo() == UndefLocNo; }
 
   DbgValueLocation changeLocNo(unsigned NewLocNo) const {
-    return DbgValueLocation(NewLocNo);
+    return DbgValueLocation(NewLocNo, WasIndirect);
   }
 
   friend inline bool operator==(const DbgValueLocation &LHS,
                                 const DbgValueLocation &RHS) {
-    return LHS.LocNo == RHS.LocNo;
+    return LHS.LocNo == RHS.LocNo && LHS.WasIndirect == RHS.WasIndirect;
   }
 
   friend inline bool operator!=(const DbgValueLocation &LHS,
@@ -129,7 +130,8 @@ public:
   }
 
 private:
-  unsigned LocNo;
+  unsigned LocNo : 31;
+  unsigned WasIndirect : 1;
 };
 
 /// Map of where a user value is live, and its location.
@@ -166,6 +168,10 @@ class UserValue {
   /// Map of slot indices where this value is live.
   LocMap locInts;
 
+  /// Set of interval start indexes that have been trimmed to the
+  /// lexical scope.
+  SmallSet<SlotIndex, 2> trimmedDefs;
+
   /// Insert a DBG_VALUE into MBB at Idx for LocNo.
   void insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
                         SlotIndex StopIdx, DbgValueLocation Loc, bool Spilled,
@@ -279,8 +285,8 @@ public:
   void mapVirtRegs(LDVImpl *LDV);
 
   /// Add a definition point to this value.
-  void addDef(SlotIndex Idx, const MachineOperand &LocMO) {
-    DbgValueLocation Loc(getLocationNo(LocMO));
+  void addDef(SlotIndex Idx, const MachineOperand &LocMO, bool IsIndirect) {
+    DbgValueLocation Loc(getLocationNo(LocMO), IsIndirect);
     // Add a singular (Idx,Idx) -> Loc mapping.
     LocMap::iterator I = locInts.find(Idx);
     if (!I.valid() || I.start() != Idx)
@@ -315,10 +321,11 @@ public:
   ///
   /// \param LI Scan for copies of the value in LI->reg.
   /// \param LocNo Location number of LI->reg.
+  /// \param WasIndirect Indicates if the original use of LI->reg was indirect
   /// \param Kills Points where the range of LocNo could be extended.
   /// \param [in,out] NewDefs Append (Idx, LocNo) of inserted defs here.
   void addDefsFromCopies(
-      LiveInterval *LI, unsigned LocNo,
+      LiveInterval *LI, unsigned LocNo, bool WasIndirect,
       const SmallVectorImpl<SlotIndex> &Kills,
       SmallVectorImpl<std::pair<SlotIndex, DbgValueLocation>> &NewDefs,
       MachineRegisterInfo &MRI, LiveIntervals &LIS);
@@ -538,6 +545,8 @@ void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
       OS << "undef";
     else {
       OS << I.value().locNo();
+      if (I.value().wasIndirect())
+        OS << " ind";
     }
   }
   for (unsigned i = 0, e = locations.size(); i != e; ++i) {
@@ -646,18 +655,19 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
   }
 
   // Get or create the UserValue for (variable,offset) here.
-  assert(!MI.getOperand(1).isImm() && "DBG_VALUE with indirect flag before "
-                                      "LiveDebugVariables");
+  bool IsIndirect = MI.getOperand(1).isImm();
+  if (IsIndirect)
+    assert(MI.getOperand(1).getImm() == 0 && "DBG_VALUE with nonzero offset");
   const DILocalVariable *Var = MI.getDebugVariable();
   const DIExpression *Expr = MI.getDebugExpression();
   UserValue *UV =
       getUserValue(Var, Expr, MI.getDebugLoc());
   if (!Discard)
-    UV->addDef(Idx, MI.getOperand(0));
+    UV->addDef(Idx, MI.getOperand(0), IsIndirect);
   else {
     MachineOperand MO = MachineOperand::CreateReg(0U, false);
     MO.setIsDebug();
-    UV->addDef(Idx, MO);
+    UV->addDef(Idx, MO, false);
   }
   return true;
 }
@@ -765,7 +775,7 @@ void UserValue::extendDef(SlotIndex Idx, DbgValueLocation Loc, LiveRange *LR,
 }
 
 void UserValue::addDefsFromCopies(
-    LiveInterval *LI, unsigned LocNo,
+    LiveInterval *LI, unsigned LocNo, bool WasIndirect,
     const SmallVectorImpl<SlotIndex> &Kills,
     SmallVectorImpl<std::pair<SlotIndex, DbgValueLocation>> &NewDefs,
     MachineRegisterInfo &MRI, LiveIntervals &LIS) {
@@ -829,7 +839,7 @@ void UserValue::addDefsFromCopies(
       MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def);
       assert(CopyMI && CopyMI->isCopy() && "Bad copy value");
       unsigned LocNo = getLocationNo(CopyMI->getOperand(0));
-      DbgValueLocation NewLoc(LocNo);
+      DbgValueLocation NewLoc(LocNo, WasIndirect);
       I.insert(Idx, Idx.getNextSlot(), NewLoc);
       NewDefs.push_back(std::make_pair(Idx, NewLoc));
       break;
@@ -877,7 +887,8 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
       // sub-register in that regclass). For now, simply skip handling copies if
       // a sub-register is involved.
       if (LI && !LocMO.getSubReg())
-        addDefsFromCopies(LI, Loc.locNo(), Kills, Defs, MRI, LIS);
+        addDefsFromCopies(LI, Loc.locNo(), Loc.wasIndirect(), Kills, Defs, MRI,
+                          LIS);
       continue;
     }
 
@@ -910,6 +921,11 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
     SlotIndex RStart = LIS.getInstructionIndex(*Range.first);
     SlotIndex REnd = LIS.getInstructionIndex(*Range.second);
 
+    // Variable locations at the first instruction of a block should be
+    // based on the block's SlotIndex, not the first instruction's index.
+    if (Range.first == Range.first->getParent()->begin())
+      RStart = LIS.getSlotIndexes()->getIndexBefore(*Range.first);
+
     // At the start of each iteration I has been advanced so that
     // I.stop() >= PrevEnd. Check for overlap.
     if (PrevEnd && I.start() < PrevEnd) {
@@ -922,7 +938,8 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
       ++I;
 
       // If the interval also overlaps the start of the "next" (i.e.
-      // current) range create a new interval for the remainder
+      // current) range create a new interval for the remainder (which
+      // may be further trimmed).
       if (RStart < IStop)
         I.insert(RStart, IStop, Loc);
     }
@@ -932,6 +949,13 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
     if (!I.valid())
       return;
 
+    if (I.start() < RStart) {
+      // Interval start overlaps range - trim to the scope range.
+      I.setStartUnchecked(RStart);
+      // Remember that this interval was trimmed.
+      trimmedDefs.insert(RStart);
+    }
+
     // The end of a lexical scope range is the last instruction in the
     // range. To convert to an interval we need the index of the
     // instruction after it.
@@ -1306,14 +1330,21 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
   // that the original virtual register was a pointer. Also, add the stack slot
   // offset for the spilled register to the expression.
   const DIExpression *Expr = Expression;
-  if (Spilled)
-    Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset, SpillOffset);
+  uint8_t DIExprFlags = DIExpression::ApplyOffset;
+  bool IsIndirect = Loc.wasIndirect();
+  if (Spilled) {
+    if (IsIndirect)
+      DIExprFlags |= DIExpression::DerefAfter;
+    Expr =
+        DIExpression::prepend(Expr, DIExprFlags, SpillOffset);
+    IsIndirect = true;
+  }
 
   assert((!Spilled || MO.isFI()) && "a spilled location must be a frame index");
 
   do {
     BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE),
-            Spilled, MO, Variable, Expr);
+            IsIndirect, MO, Variable, Expr);
 
     // Continue and insert DBG_VALUES after every redefinition of register
     // associated with the debug value within the range
@@ -1345,6 +1376,12 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
     bool Spilled = SpillIt != SpillOffsets.end();
     unsigned SpillOffset = Spilled ? SpillIt->second : 0;
 
+    // If the interval start was trimmed to the lexical scope insert the
+    // DBG_VALUE at the previous index (otherwise it appears after the
+    // first instruction in the range).
+    if (trimmedDefs.count(Start))
+      Start = Start.getPrevIndex();
+
     LLVM_DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << Loc.locNo());
     MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator();
     SlotIndex MBBEnd = LIS.getMBBEndIdx(&*MBB);
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 2bec8613e79c..8294591b7326 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1393,11 +1393,9 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
              "Expected inlined-at fields to agree");
       // A dbg.declare describes the address of a source variable, so lower it
       // into an indirect DBG_VALUE.
-      auto *Expr = DI->getExpression();
-      Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-              TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ false,
-              *Op, DI->getVariable(), Expr);
+              TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true,
+              *Op, DI->getVariable(), DI->getExpression());
     } else {
       // We can't yet handle anything else here because it would require
       // generating code, thus altering codegen because of debug info.
@@ -1421,19 +1419,19 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
       if (CI->getBitWidth() > 64)
         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
             .addCImm(CI)
-            .addReg(0U)
+            .addImm(0U)
             .addMetadata(DI->getVariable())
             .addMetadata(DI->getExpression());
       else
         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
             .addImm(CI->getZExtValue())
-            .addReg(0U)
+            .addImm(0U)
             .addMetadata(DI->getVariable())
             .addMetadata(DI->getExpression());
     } else if (const auto *CF = dyn_cast<ConstantFP>(V)) {
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
           .addFPImm(CF)
-          .addReg(0U)
+          .addImm(0U)
           .addMetadata(DI->getVariable())
           .addMetadata(DI->getExpression());
     } else if (unsigned Reg = lookUpRegForValue(V)) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index c613c2540628..176d71643e1a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -677,7 +677,7 @@ MachineInstr *
 InstrEmitter::EmitDbgValue(SDDbgValue *SD,
                            DenseMap<SDValue, unsigned> &VRBaseMap) {
   MDNode *Var = SD->getVariable();
-  const DIExpression *Expr = SD->getExpression();
+  MDNode *Expr = SD->getExpression();
   DebugLoc DL = SD->getDebugLoc();
   assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
          "Expected inlined-at fields to agree");
@@ -701,11 +701,12 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
     // EmitTargetCodeForFrameDebugValue is responsible for allocation.
     auto FrameMI = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE))
                        .addFrameIndex(SD->getFrameIx());
-
     if (SD->isIndirect())
-      Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});
-
-    FrameMI.addReg(0);
+      // Push [fi + 0] onto the DIExpression stack.
+      FrameMI.addImm(0);
+    else
+      // Push fi onto the DIExpression stack.
+      FrameMI.addReg(0);
     return FrameMI.addMetadata(Var).addMetadata(Expr);
   }
   // Otherwise, we're going to create an instruction here.
@@ -751,9 +752,9 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
 
   // Indirect addressing is indicated by an Imm as the second parameter.
   if (SD->isIndirect())
-    Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});
-
-  MIB.addReg(0U, RegState::Debug);
+    MIB.addImm(0U);
+  else
+    MIB.addReg(0U, RegState::Debug);
 
   MIB.addMetadata(Var);
   MIB.addMetadata(Expr);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 974914d00d05..d809139d3807 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4716,11 +4716,11 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
     break;
   case ISD::VECREDUCE_FMAX:
     NeutralElem = DAG.getConstantFP(
-        std::numeric_limits<double>::infinity(), dl, ElemVT);
+        -std::numeric_limits<double>::infinity(), dl, ElemVT);
     break;
   case ISD::VECREDUCE_FMIN:
     NeutralElem = DAG.getConstantFP(
-        -std::numeric_limits<double>::infinity(), dl, ElemVT);
+        std::numeric_limits<double>::infinity(), dl, ElemVT);
     break;
   }
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 728d963a916f..421ff3e7d472 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5622,6 +5622,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
   MachineFunction &MF = DAG.getMachineFunction();
   const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
 
+  bool IsIndirect = false;
   Optional<MachineOperand> Op;
   // Some arguments' frame index is recorded during argument lowering.
   int FI = FuncInfo.getArgumentFrameIndex(Arg);
@@ -5643,6 +5644,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
     }
     if (Reg) {
       Op = MachineOperand::CreateReg(Reg, false);
+      IsIndirect = IsDbgDeclare;
     }
   }
 
@@ -5691,7 +5693,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
         }
         assert(!IsDbgDeclare && "DbgDeclare operand is not in memory?");
         FuncInfo.ArgDbgValues.push_back(
-          BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false,
+          BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare,
                   RegAndSize.first, Variable, *FragmentExpr));
       }
     };
@@ -5709,6 +5711,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
       }
 
       Op = MachineOperand::CreateReg(VMI->second, false);
+      IsIndirect = IsDbgDeclare;
     } else if (ArgRegsAndSizes.size() > 1) {
       // This was split due to the calling convention, and no virtual register
       // mapping exists for the value.
@@ -5722,28 +5725,9 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
 
   assert(Variable->isValidLocationForIntrinsic(DL) &&
          "Expected inlined-at fields to agree");
-
-  // If the argument arrives in a stack slot, then what the IR thought was a
-  // normal Value is actually in memory, and we must add a deref to load it.
-  if (Op->isFI()) {
-    int FI = Op->getIndex();
-    unsigned Size = DAG.getMachineFunction().getFrameInfo().getObjectSize(FI);
-    if (Expr->isImplicit()) {
-      SmallVector<uint64_t, 2> Ops = {dwarf::DW_OP_deref_size, Size};
-      Expr = DIExpression::prependOpcodes(Expr, Ops);
-    } else {
-      Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
-    }
-  }
-
-  // If this location was specified with a dbg.declare, then it and its
-  // expression calculate the address of the variable. Append a deref to
-  // force it to be a memory location.
-  if (IsDbgDeclare)
-    Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});
-
+  IsIndirect = (Op->isReg()) ? IsIndirect : true;
   FuncInfo.ArgDbgValues.push_back(
-      BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false,
+      BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
               *Op, Variable, Expr));
 
   return true;
diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp
index 40bc36c3030b..9d4fdc6b624c 100644
--- a/llvm/lib/CodeGen/StackColoring.cpp
+++ b/llvm/lib/CodeGen/StackColoring.cpp
@@ -960,7 +960,8 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
   }
 
   // Remap all instructions to the new stack slots.
-  std::vector<std::vector<MachineMemOperand *>> SSRefs(MFI->getObjectIndexEnd());
+  std::vector<std::vector<MachineMemOperand *>> SSRefs(
+      MFI->getObjectIndexEnd());
   for (MachineBasicBlock &BB : *MF)
     for (MachineInstr &I : BB) {
       // Skip lifetime markers. We'll remove them soon.
@@ -1074,12 +1075,13 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
     }
 
   // Rewrite MachineMemOperands that reference old frame indices.
-  for (auto E : enumerate(SSRefs)) {
-    const PseudoSourceValue *NewSV =
-        MF->getPSVManager().getFixedStack(SlotRemap[E.index()]);
-    for (MachineMemOperand *Ref : E.value())
-      Ref->setValue(NewSV);
-  }
+  for (auto E : enumerate(SSRefs))
+    if (!E.value().empty()) {
+      const PseudoSourceValue *NewSV =
+          MF->getPSVManager().getFixedStack(SlotRemap.find(E.index())->second);
+      for (MachineMemOperand *Ref : E.value())
+        Ref->setValue(NewSV);
+    }
 
   // Update the location of C++ catch objects for the MSVC personality routine.
   if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo())
diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp
index 4522484222f5..e8b39c037693 100644
--- a/llvm/lib/CodeGen/TypePromotion.cpp
+++ b/llvm/lib/CodeGen/TypePromotion.cpp
@@ -847,8 +847,7 @@ bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) {
 
   // Iterate through, and add to, a tree of operands and users in the use-def.
   while (!WorkList.empty()) {
-    Value *V = WorkList.back();
-    WorkList.pop_back();
+    Value *V = WorkList.pop_back_val();
     if (CurrentVisited.count(V))
       continue;
 
@@ -917,7 +916,7 @@ bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) {
      ++ToPromote;
    }
 
-  // DAG optimisations should be able to handle these cases better, especially
+  // DAG optimizations should be able to handle these cases better, especially
   // for function arguments.
   if (ToPromote < 2 || (Blocks.size() == 1 && (NonFreeArgs > SafeWrap.size())))
     return false;
@@ -941,6 +940,9 @@ bool TypePromotion::runOnFunction(Function &F) {
   if (!TPC)
     return false;
 
+  AllVisited.clear();
+  SafeToPromote.clear();
+  SafeWrap.clear();
   bool MadeChange = false;
   const DataLayout &DL = F.getParent()->getDataLayout();
   const TargetMachine &TM = TPC->getTM<TargetMachine>();
@@ -998,6 +1000,10 @@ bool TypePromotion::runOnFunction(Function &F) {
   if (MadeChange)
     LLVM_DEBUG(dbgs() << "After TypePromotion: " << F << "\n");
 
+  AllVisited.clear();
+  SafeToPromote.clear();
+  SafeWrap.clear();
+
   return MadeChange;
 }
 
diff --git a/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp b/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
index f26835ff8a08..9c504da611e0 100644
--- a/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
@@ -67,9 +67,11 @@ namespace orc {
 
 class PartitioningIRMaterializationUnit : public IRMaterializationUnit {
 public:
-  PartitioningIRMaterializationUnit(ExecutionSession &ES, ThreadSafeModule TSM,
-                                    VModuleKey K, CompileOnDemandLayer &Parent)
-      : IRMaterializationUnit(ES, std::move(TSM), std::move(K)),
+  PartitioningIRMaterializationUnit(ExecutionSession &ES,
+                                    const ManglingOptions &MO,
+                                    ThreadSafeModule TSM, VModuleKey K,
+                                    CompileOnDemandLayer &Parent)
+      : IRMaterializationUnit(ES, MO, std::move(TSM), std::move(K)),
         Parent(Parent) {}
 
   PartitioningIRMaterializationUnit(
@@ -111,7 +113,8 @@ CompileOnDemandLayer::compileWholeModule(GlobalValueSet Requested) {
 CompileOnDemandLayer::CompileOnDemandLayer(
     ExecutionSession &ES, IRLayer &BaseLayer, LazyCallThroughManager &LCTMgr,
     IndirectStubsManagerBuilder BuildIndirectStubsManager)
-    : IRLayer(ES), BaseLayer(BaseLayer), LCTMgr(LCTMgr),
+    : IRLayer(ES, BaseLayer.getManglingOptions()), BaseLayer(BaseLayer),
+      LCTMgr(LCTMgr),
       BuildIndirectStubsManager(std::move(BuildIndirectStubsManager)) {}
 
 void CompileOnDemandLayer::setPartitionFunction(PartitionFunction Partition) {
@@ -136,27 +139,23 @@ void CompileOnDemandLayer::emit(MaterializationResponsibility R,
   TSM.withModuleDo([&](Module &M) {
     // First, do some cleanup on the module:
     cleanUpModule(M);
-
-    MangleAndInterner Mangle(ES, M.getDataLayout());
-    for (auto &GV : M.global_values()) {
-      if (GV.isDeclaration() || GV.hasLocalLinkage() ||
-          GV.hasAppendingLinkage())
-        continue;
-
-      auto Name = Mangle(GV.getName());
-      auto Flags = JITSymbolFlags::fromGlobalValue(GV);
-      if (Flags.isCallable())
-        Callables[Name] = SymbolAliasMapEntry(Name, Flags);
-      else
-        NonCallables[Name] = SymbolAliasMapEntry(Name, Flags);
-    }
   });
 
+  for (auto &KV : R.getSymbols()) {
+    auto &Name = KV.first;
+    auto &Flags = KV.second;
+    if (Flags.isCallable())
+      Callables[Name] = SymbolAliasMapEntry(Name, Flags);
+    else
+      NonCallables[Name] = SymbolAliasMapEntry(Name, Flags);
+  }
+
   // Create a partitioning materialization unit and lodge it with the
   // implementation dylib.
   if (auto Err = PDR.getImplDylib().define(
           std::make_unique<PartitioningIRMaterializationUnit>(
-              ES, std::move(TSM), R.getVModuleKey(), *this))) {
+              ES, *getManglingOptions(), std::move(TSM), R.getVModuleKey(),
+              *this))) {
     ES.reportError(std::move(Err));
     R.failMaterialization();
     return;
@@ -316,7 +315,7 @@ void CompileOnDemandLayer::emitPartition(
   }
 
   R.replace(std::make_unique<PartitioningIRMaterializationUnit>(
-      ES, std::move(TSM), R.getVModuleKey(), *this));
+      ES, *getManglingOptions(), std::move(TSM), R.getVModuleKey(), *this));
   BaseLayer.emit(std::move(R), std::move(*ExtractedTSM));
 }
 
diff --git a/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp b/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp
index f5671d90420a..160e5ba50311 100644
--- a/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp
@@ -24,11 +24,20 @@
 namespace llvm {
 namespace orc {
 
+IRMaterializationUnit::ManglingOptions
+irManglingOptionsFromTargetOptions(const TargetOptions &Opts) {
+  IRMaterializationUnit::ManglingOptions MO;
+
+  MO.EmulatedTLS = Opts.EmulatedTLS;
+
+  return MO;
+}
+
 /// Compile a Module to an ObjectFile.
-SimpleCompiler::CompileResult SimpleCompiler::operator()(Module &M) {
+Expected<SimpleCompiler::CompileResult> SimpleCompiler::operator()(Module &M) {
   CompileResult CachedObject = tryToLoadFromObjectCache(M);
   if (CachedObject)
-    return CachedObject;
+    return std::move(CachedObject);
 
   SmallVector<char, 0> ObjBufferSV;
 
@@ -38,7 +47,8 @@ SimpleCompiler::CompileResult SimpleCompiler::operator()(Module &M) {
     legacy::PassManager PM;
     MCContext *Ctx;
     if (TM.addPassesToEmitMC(PM, Ctx, ObjStream))
-      llvm_unreachable("Target does not support MC emission.");
+      return make_error<StringError>("Target does not support MC emission",
+                                     inconvertibleErrorCode());
     PM.run(M);
   }
 
@@ -47,14 +57,11 @@ SimpleCompiler::CompileResult SimpleCompiler::operator()(Module &M) {
 
   auto Obj = object::ObjectFile::createObjectFile(ObjBuffer->getMemBufferRef());
 
-  if (Obj) {
-    notifyObjectCompiled(M, *ObjBuffer);
-    return std::move(ObjBuffer);
-  }
+  if (!Obj)
+    return Obj.takeError();
 
-  // TODO: Actually report errors helpfully.
-  consumeError(Obj.takeError());
-  return nullptr;
+  notifyObjectCompiled(M, *ObjBuffer);
+  return std::move(ObjBuffer);
 }
 
 SimpleCompiler::CompileResult
@@ -73,9 +80,11 @@ void SimpleCompiler::notifyObjectCompiled(const Module &M,
 
 ConcurrentIRCompiler::ConcurrentIRCompiler(JITTargetMachineBuilder JTMB,
                                            ObjectCache *ObjCache)
-    : JTMB(std::move(JTMB)), ObjCache(ObjCache) {}
+    : IRCompiler(irManglingOptionsFromTargetOptions(JTMB.getOptions())),
+      JTMB(std::move(JTMB)), ObjCache(ObjCache) {}
 
-std::unique_ptr<MemoryBuffer> ConcurrentIRCompiler::operator()(Module &M) {
+Expected<std::unique_ptr<MemoryBuffer>>
+ConcurrentIRCompiler::operator()(Module &M) {
   auto TM = cantFail(JTMB.createTargetMachine());
   SimpleCompiler C(*TM, ObjCache);
   return C(M);
diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp
index 63ef889dae46..ec706cf63d35 100644
--- a/llvm/lib/ExecutionEngine/Orc/Core.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp
@@ -468,15 +468,19 @@ Error MaterializationResponsibility::notifyEmitted() {
 }
 
 Error MaterializationResponsibility::defineMaterializing(
-    const SymbolFlagsMap &NewSymbolFlags) {
-  // Add the given symbols to this responsibility object.
-  // It's ok if we hit a duplicate here: In that case the new version will be
-  // discarded, and the JITDylib::defineMaterializing method will return a
-  // duplicate symbol error.
-  for (auto &KV : NewSymbolFlags)
-    SymbolFlags.insert(KV);
+    SymbolFlagsMap NewSymbolFlags) {
 
-  return JD.defineMaterializing(NewSymbolFlags);
+  LLVM_DEBUG({
+      dbgs() << "In " << JD.getName() << " defining materializing symbols "
+             << NewSymbolFlags << "\n";
+    });
+  if (auto AcceptedDefs = JD.defineMaterializing(std::move(NewSymbolFlags))) {
+    // Add all newly accepted symbols to this responsibility object.
+    for (auto &KV : *AcceptedDefs)
+      SymbolFlags.insert(KV);
+    return Error::success();
+  } else
+    return AcceptedDefs.takeError();
 }
 
 void MaterializationResponsibility::failMaterialization() {
@@ -809,31 +813,52 @@ void JITDylib::removeGenerator(DefinitionGenerator &G) {
   });
 }
 
-Error JITDylib::defineMaterializing(const SymbolFlagsMap &SymbolFlags) {
-  return ES.runSessionLocked([&]() -> Error {
+Expected<SymbolFlagsMap>
+JITDylib::defineMaterializing(SymbolFlagsMap SymbolFlags) {
+
+  return ES.runSessionLocked([&]() -> Expected<SymbolFlagsMap> {
     std::vector<SymbolTable::iterator> AddedSyms;
+    std::vector<SymbolFlagsMap::iterator> RejectedWeakDefs;
 
-    for (auto &KV : SymbolFlags) {
-      SymbolTable::iterator EntryItr;
-      bool Added;
+    for (auto SFItr = SymbolFlags.begin(), SFEnd = SymbolFlags.end();
+         SFItr != SFEnd; ++SFItr) {
 
-      std::tie(EntryItr, Added) =
-          Symbols.insert(std::make_pair(KV.first, SymbolTableEntry(KV.second)));
+      auto &Name = SFItr->first;
+      auto &Flags = SFItr->second;
 
-      if (Added) {
-        AddedSyms.push_back(EntryItr);
-        EntryItr->second.setState(SymbolState::Materializing);
-      } else {
-        // Remove any symbols already added.
-        for (auto &SI : AddedSyms)
-          Symbols.erase(SI);
+      auto EntryItr = Symbols.find(Name);
 
-        // FIXME: Return all duplicates.
-        return make_error<DuplicateDefinition>(*KV.first);
-      }
+      // If the entry already exists...
+      if (EntryItr != Symbols.end()) {
+
+        // If this is a strong definition then error out.
+        if (!Flags.isWeak()) {
+          // Remove any symbols already added.
+          for (auto &SI : AddedSyms)
+            Symbols.erase(SI);
+
+          // FIXME: Return all duplicates.
+          return make_error<DuplicateDefinition>(*Name);
+        }
+
+        // Otherwise just make a note to discard this symbol after the loop.
+        RejectedWeakDefs.push_back(SFItr);
+        continue;
+      } else
+        EntryItr =
+          Symbols.insert(std::make_pair(Name, SymbolTableEntry(Flags))).first;
+
+      AddedSyms.push_back(EntryItr);
+      EntryItr->second.setState(SymbolState::Materializing);
     }
 
-    return Error::success();
+    // Remove any rejected weak definitions from the SymbolFlags map.
+    while (!RejectedWeakDefs.empty()) {
+      SymbolFlags.erase(RejectedWeakDefs.back());
+      RejectedWeakDefs.pop_back();
+    }
+
+    return SymbolFlags;
   });
 }
 
diff --git a/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp b/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp
index d311f34179c7..023940dc8298 100644
--- a/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp
@@ -11,9 +11,14 @@
 namespace llvm {
 namespace orc {
 
+IRCompileLayer::IRCompiler::~IRCompiler() {}
+
 IRCompileLayer::IRCompileLayer(ExecutionSession &ES, ObjectLayer &BaseLayer,
-                                 CompileFunction Compile)
-    : IRLayer(ES), BaseLayer(BaseLayer), Compile(std::move(Compile)) {}
+                               std::unique_ptr<IRCompiler> Compile)
+    : IRLayer(ES, ManglingOpts), BaseLayer(BaseLayer),
+      Compile(std::move(Compile)) {
+  ManglingOpts = &this->Compile->getManglingOptions();
+}
 
 void IRCompileLayer::setNotifyCompiled(NotifyCompiledFunction NotifyCompiled) {
   std::lock_guard<std::mutex> Lock(IRLayerMutex);
@@ -24,7 +29,7 @@ void IRCompileLayer::emit(MaterializationResponsibility R,
                           ThreadSafeModule TSM) {
   assert(TSM && "Module must not be null");
 
-  if (auto Obj = TSM.withModuleDo(Compile)) {
+  if (auto Obj = TSM.withModuleDo(*Compile)) {
     {
       std::lock_guard<std::mutex> Lock(IRLayerMutex);
       if (NotifyCompiled)
diff --git a/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp b/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp
index 845ecc71eb87..511248f83b25 100644
--- a/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp
@@ -12,10 +12,10 @@
 namespace llvm {
 namespace orc {
 
-IRTransformLayer::IRTransformLayer(ExecutionSession &ES,
-                                     IRLayer &BaseLayer,
-                                     TransformFunction Transform)
-    : IRLayer(ES), BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
+IRTransformLayer::IRTransformLayer(ExecutionSession &ES, IRLayer &BaseLayer,
+                                   TransformFunction Transform)
+    : IRLayer(ES, BaseLayer.getManglingOptions()), BaseLayer(BaseLayer),
+      Transform(std::move(Transform)) {}
 
 void IRTransformLayer::emit(MaterializationResponsibility R,
                             ThreadSafeModule TSM) {
diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
index 54473ab46423..6189056b3d9f 100644
--- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
@@ -96,8 +96,10 @@ LLJIT::createObjectLinkingLayer(LLJITBuilderState &S, ExecutionSession &ES) {
   auto ObjLinkingLayer =
       std::make_unique<RTDyldObjectLinkingLayer>(ES, std::move(GetMemMgr));
 
-  if (S.JTMB->getTargetTriple().isOSBinFormatCOFF())
+  if (S.JTMB->getTargetTriple().isOSBinFormatCOFF()) {
     ObjLinkingLayer->setOverrideObjectFlagsWithResponsibilityFlags(true);
+    ObjLinkingLayer->setAutoClaimResponsibilityForObjectSymbols(true);
+  }
 
   // FIXME: Explicit conversion to std::unique_ptr<ObjectLayer> added to silence
   //        errors from some GCC / libstdc++ bots. Remove this conversion (i.e.
@@ -105,7 +107,7 @@ LLJIT::createObjectLinkingLayer(LLJITBuilderState &S, ExecutionSession &ES) {
   return std::unique_ptr<ObjectLayer>(std::move(ObjLinkingLayer));
 }
 
-Expected<IRCompileLayer::CompileFunction>
+Expected<std::unique_ptr<IRCompileLayer::IRCompiler>>
 LLJIT::createCompileFunction(LLJITBuilderState &S,
                              JITTargetMachineBuilder JTMB) {
 
@@ -116,13 +118,13 @@ LLJIT::createCompileFunction(LLJITBuilderState &S,
   // Otherwise default to creating a SimpleCompiler, or ConcurrentIRCompiler,
   // depending on the number of threads requested.
   if (S.NumCompileThreads > 0)
-    return ConcurrentIRCompiler(std::move(JTMB));
+    return std::make_unique<ConcurrentIRCompiler>(std::move(JTMB));
 
   auto TM = JTMB.createTargetMachine();
   if (!TM)
     return TM.takeError();
 
-  return TMOwningSimpleCompiler(std::move(*TM));
+  return std::make_unique<TMOwningSimpleCompiler>(std::move(*TM));
 }
 
 LLJIT::LLJIT(LLJITBuilderState &S, Error &Err)
diff --git a/llvm/lib/ExecutionEngine/Orc/Layer.cpp b/llvm/lib/ExecutionEngine/Orc/Layer.cpp
index 580e2682ec8c..ebc7801f11ff 100644
--- a/llvm/lib/ExecutionEngine/Orc/Layer.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Layer.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ExecutionEngine/Orc/Layer.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Debug.h"
 
@@ -15,15 +16,15 @@
 namespace llvm {
 namespace orc {
 
-IRLayer::IRLayer(ExecutionSession &ES) : ES(ES) {}
 IRLayer::~IRLayer() {}
 
 Error IRLayer::add(JITDylib &JD, ThreadSafeModule TSM, VModuleKey K) {
   return JD.define(std::make_unique<BasicIRLayerMaterializationUnit>(
-      *this, std::move(K), std::move(TSM)));
+      *this, *getManglingOptions(), std::move(TSM), std::move(K)));
 }
 
 IRMaterializationUnit::IRMaterializationUnit(ExecutionSession &ES,
+                                             const ManglingOptions &MO,
                                              ThreadSafeModule TSM, VModuleKey K)
     : MaterializationUnit(SymbolFlagsMap(), std::move(K)), TSM(std::move(TSM)) {
 
@@ -32,12 +33,44 @@ IRMaterializationUnit::IRMaterializationUnit(ExecutionSession &ES,
   MangleAndInterner Mangle(ES, this->TSM.getModuleUnlocked()->getDataLayout());
   this->TSM.withModuleDo([&](Module &M) {
     for (auto &G : M.global_values()) {
-      if (G.hasName() && !G.isDeclaration() && !G.hasLocalLinkage() &&
-          !G.hasAvailableExternallyLinkage() && !G.hasAppendingLinkage()) {
-        auto MangledName = Mangle(G.getName());
-        SymbolFlags[MangledName] = JITSymbolFlags::fromGlobalValue(G);
-        SymbolToDefinition[MangledName] = &G;
+      // Skip globals that don't generate symbols.
+      if (!G.hasName() || G.isDeclaration() || G.hasLocalLinkage() ||
+          G.hasAvailableExternallyLinkage() || G.hasAppendingLinkage())
+        continue;
+
+      // thread locals generate different symbols depending on whether or not
+      // emulated TLS is enabled.
+      if (G.isThreadLocal() && MO.EmulatedTLS) {
+        auto &GV = cast<GlobalVariable>(G);
+
+        auto Flags = JITSymbolFlags::fromGlobalValue(GV);
+
+        auto EmuTLSV = Mangle(("__emutls_v." + GV.getName()).str());
+        SymbolFlags[EmuTLSV] = Flags;
+        SymbolToDefinition[EmuTLSV] = &GV;
+
+        // If this GV has a non-zero initializer we'll need to emit an
+        // __emutls.t symbol too.
+        if (GV.hasInitializer()) {
+          const auto *InitVal = GV.getInitializer();
+
+          // Skip zero-initializers.
+          if (isa<ConstantAggregateZero>(InitVal))
+            continue;
+          const auto *InitIntValue = dyn_cast<ConstantInt>(InitVal);
+          if (InitIntValue && InitIntValue->isZero())
+            continue;
+
+          auto EmuTLST = Mangle(("__emutls_t." + GV.getName()).str());
+          SymbolFlags[EmuTLST] = Flags;
+        }
+        continue;
       }
+
+      // Otherwise we just need a normal linker mangling.
+      auto MangledName = Mangle(G.getName());
+      SymbolFlags[MangledName] = JITSymbolFlags::fromGlobalValue(G);
+      SymbolToDefinition[MangledName] = &G;
     }
   });
 }
@@ -72,8 +105,8 @@ void IRMaterializationUnit::discard(const JITDylib &JD,
 }
 
 BasicIRLayerMaterializationUnit::BasicIRLayerMaterializationUnit(
-    IRLayer &L, VModuleKey K, ThreadSafeModule TSM)
-    : IRMaterializationUnit(L.getExecutionSession(), std::move(TSM),
+    IRLayer &L, const ManglingOptions &MO, ThreadSafeModule TSM, VModuleKey K)
+    : IRMaterializationUnit(L.getExecutionSession(), MO, std::move(TSM),
                             std::move(K)),
       L(L), K(std::move(K)) {}
 
diff --git a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
index a92264c0be14..ff8289a264c8 100644
--- a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
+#include "llvm/Object/COFF.h"
 
 namespace {
 
@@ -160,6 +161,39 @@ Error RTDyldObjectLinkingLayer::onObjLoad(
     std::set<StringRef> &InternalSymbols) {
   SymbolFlagsMap ExtraSymbolsToClaim;
   SymbolMap Symbols;
+
+  // Hack to support COFF constant pool comdats introduced during compilation:
+  // (See http://llvm.org/PR40074)
+  if (auto *COFFObj = dyn_cast<object::COFFObjectFile>(&Obj)) {
+    auto &ES = getExecutionSession();
+
+    // For all resolved symbols that are not already in the responsibilty set:
+    // check whether the symbol is in a comdat section and if so mark it as
+    // weak.
+    for (auto &Sym : COFFObj->symbols()) {
+      if (Sym.getFlags() & object::BasicSymbolRef::SF_Undefined)
+        continue;
+      auto Name = Sym.getName();
+      if (!Name)
+        return Name.takeError();
+      auto I = Resolved.find(*Name);
+
+      // Skip unresolved symbols, internal symbols, and symbols that are
+      // already in the responsibility set.
+      if (I == Resolved.end() || InternalSymbols.count(*Name) ||
+          R.getSymbols().count(ES.intern(*Name)))
+        continue;
+      auto Sec = Sym.getSection();
+      if (!Sec)
+        return Sec.takeError();
+      if (*Sec == COFFObj->section_end())
+        continue;
+      auto &COFFSec = *COFFObj->getCOFFSection(**Sec);
+      if (COFFSec.Characteristics & COFF::IMAGE_SCN_LNK_COMDAT)
+        I->second.setFlags(I->second.getFlags() | JITSymbolFlags::Weak);
+    }
+  }
+
   for (auto &KV : Resolved) {
     // Scan the symbols and add them to the Symbols map for resolution.
 
@@ -184,10 +218,17 @@ Error RTDyldObjectLinkingLayer::onObjLoad(
     Symbols[InternedName] = JITEvaluatedSymbol(KV.second.getAddress(), Flags);
   }
 
-  if (!ExtraSymbolsToClaim.empty())
+  if (!ExtraSymbolsToClaim.empty()) {
     if (auto Err = R.defineMaterializing(ExtraSymbolsToClaim))
       return Err;
 
+    // If we claimed responsibility for any weak symbols but were rejected then
+    // we need to remove them from the resolved set.
+    for (auto &KV : ExtraSymbolsToClaim)
+      if (KV.second.isWeak() && !R.getSymbols().count(KV.first))
+        Symbols.erase(KV.first);
+  }
+
   if (auto Err = R.notifyResolved(Symbols)) {
     R.failMaterialization();
     return Err;
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index acf0e4afef27..1f978d136049 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -2651,8 +2651,10 @@ void AssemblyWriter::printModule(const Module *M) {
   printUseLists(nullptr);
 
   // Output all of the functions.
-  for (const Function &F : *M)
+  for (const Function &F : *M) {
+    Out << '\n';
     printFunction(&F);
+  }
   assert(UseListOrders.empty() && "All use-lists should have been consumed");
 
   // Output all attribute groups.
diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp
index e13656ed1c10..af934cc8b9be 100644
--- a/llvm/lib/Linker/IRMover.cpp
+++ b/llvm/lib/Linker/IRMover.cpp
@@ -1277,11 +1277,17 @@ Error IRLinker::linkModuleFlagsMetadata() {
     }
 
     // Diagnose inconsistent merge behavior types.
-    if (SrcBehaviorValue != DstBehaviorValue)
-      return stringErr("linking module flags '" + ID->getString() +
-                       "': IDs have conflicting behaviors in '" +
-                       SrcM->getModuleIdentifier() + "' and '" +
-                       DstM.getModuleIdentifier() + "'");
+    if (SrcBehaviorValue != DstBehaviorValue) {
+      bool MaxAndWarn = (SrcBehaviorValue == Module::Max &&
+                         DstBehaviorValue == Module::Warning) ||
+                        (DstBehaviorValue == Module::Max &&
+                         SrcBehaviorValue == Module::Warning);
+      if (!MaxAndWarn)
+        return stringErr("linking module flags '" + ID->getString() +
+                         "': IDs have conflicting behaviors in '" +
+                         SrcM->getModuleIdentifier() + "' and '" +
+                         DstM.getModuleIdentifier() + "'");
+    }
 
     auto replaceDstValue = [&](MDNode *New) {
       Metadata *FlagOps[] = {DstOp->getOperand(0), ID, New};
@@ -1290,6 +1296,40 @@ Error IRLinker::linkModuleFlagsMetadata() {
       Flags[ID].first = Flag;
     };
 
+    // Emit a warning if the values differ and either source or destination
+    // request Warning behavior.
+    if ((DstBehaviorValue == Module::Warning ||
+         SrcBehaviorValue == Module::Warning) &&
+        SrcOp->getOperand(2) != DstOp->getOperand(2)) {
+      std::string Str;
+      raw_string_ostream(Str)
+          << "linking module flags '" << ID->getString()
+          << "': IDs have conflicting values ('" << *SrcOp->getOperand(2)
+          << "' from " << SrcM->getModuleIdentifier() << " with '"
+          << *DstOp->getOperand(2) << "' from " << DstM.getModuleIdentifier()
+          << ')';
+      emitWarning(Str);
+    }
+
+    // Choose the maximum if either source or destination request Max behavior.
+    if (DstBehaviorValue == Module::Max || SrcBehaviorValue == Module::Max) {
+      ConstantInt *DstValue =
+          mdconst::extract<ConstantInt>(DstOp->getOperand(2));
+      ConstantInt *SrcValue =
+          mdconst::extract<ConstantInt>(SrcOp->getOperand(2));
+
+      // The resulting flag should have a Max behavior, and contain the maximum
+      // value from between the source and destination values.
+      Metadata *FlagOps[] = {
+          (DstBehaviorValue != Module::Max ? SrcOp : DstOp)->getOperand(0), ID,
+          (SrcValue->getZExtValue() > DstValue->getZExtValue() ? SrcOp : DstOp)
+              ->getOperand(2)};
+      MDNode *Flag = MDNode::get(DstM.getContext(), FlagOps);
+      DstModFlags->setOperand(DstIndex, Flag);
+      Flags[ID].first = Flag;
+      continue;
+    }
+
     // Perform the merge for standard behavior types.
     switch (SrcBehaviorValue) {
     case Module::Require:
@@ -1305,26 +1345,9 @@ Error IRLinker::linkModuleFlagsMetadata() {
       continue;
     }
     case Module::Warning: {
-      // Emit a warning if the values differ.
-      if (SrcOp->getOperand(2) != DstOp->getOperand(2)) {
-        std::string str;
-        raw_string_ostream(str)
-            << "linking module flags '" << ID->getString()
-            << "': IDs have conflicting values ('" << *SrcOp->getOperand(2)
-            << "' from " << SrcM->getModuleIdentifier() << " with '"
-            << *DstOp->getOperand(2) << "' from " << DstM.getModuleIdentifier()
-            << ')';
-        emitWarning(str);
-      }
-      continue;
+      break;
     }
     case Module::Max: {
-      ConstantInt *DstValue =
-          mdconst::extract<ConstantInt>(DstOp->getOperand(2));
-      ConstantInt *SrcValue =
-          mdconst::extract<ConstantInt>(SrcOp->getOperand(2));
-      if (SrcValue->getZExtValue() > DstValue->getZExtValue())
-        overrideDstValue();
       break;
     }
     case Module::Append: {
@@ -1350,6 +1373,7 @@ Error IRLinker::linkModuleFlagsMetadata() {
       break;
     }
     }
+
   }
 
   // Check all of the requirements.
diff --git a/llvm/lib/Support/CRC.cpp b/llvm/lib/Support/CRC.cpp
index a3dba1a3aa10..2bc668beed32 100644
--- a/llvm/lib/Support/CRC.cpp
+++ b/llvm/lib/Support/CRC.cpp
@@ -85,7 +85,15 @@ uint32_t llvm::crc32(uint32_t CRC, ArrayRef<uint8_t> Data) {
 
 #include <zlib.h>
 uint32_t llvm::crc32(uint32_t CRC, ArrayRef<uint8_t> Data) {
-  return ::crc32(CRC, (const Bytef *)Data.data(), Data.size());
+  // Zlib's crc32() only takes a 32-bit length, so we have to iterate for larger
+  // sizes. One could use crc32_z() instead, but that's a recent (2017) addition
+  // and may not be available on all systems.
+  do {
+    ArrayRef<uint8_t> Slice = Data.take_front(UINT32_MAX);
+    CRC = ::crc32(CRC, (const Bytef *)Slice.data(), (uInt)Slice.size());
+    Data = Data.drop_front(Slice.size());
+  } while (Data.size() > 0);
+  return CRC;
 }
 
 #endif
diff --git a/llvm/lib/Support/CrashRecoveryContext.cpp b/llvm/lib/Support/CrashRecoveryContext.cpp
index b9031f52375c..356835609830 100644
--- a/llvm/lib/Support/CrashRecoveryContext.cpp
+++ b/llvm/lib/Support/CrashRecoveryContext.cpp
@@ -14,9 +14,6 @@
 #include "llvm/Support/ThreadLocal.h"
 #include <mutex>
 #include <setjmp.h>
-#ifdef _WIN32
-#include <excpt.h> // for GetExceptionInformation
-#endif
 #if LLVM_ON_UNIX
 #include <sysexits.h> // EX_IOERR
 #endif
@@ -41,11 +38,11 @@ struct CrashRecoveryContextImpl {
   ::jmp_buf JumpBuffer;
   volatile unsigned Failed : 1;
   unsigned SwitchedThread : 1;
+  unsigned ValidJumpBuffer : 1;
 
 public:
-  CrashRecoveryContextImpl(CrashRecoveryContext *CRC) : CRC(CRC),
-                                                        Failed(false),
-                                                        SwitchedThread(false) {
+  CrashRecoveryContextImpl(CrashRecoveryContext *CRC) noexcept
+      : CRC(CRC), Failed(false), SwitchedThread(false), ValidJumpBuffer(false) {
     Next = CurrentContext->get();
     CurrentContext->set(this);
   }
@@ -80,10 +77,13 @@ public:
     CRC->RetCode = RetCode;
 
     // Jump back to the RunSafely we were called under.
-    longjmp(JumpBuffer, 1);
+    if (ValidJumpBuffer)
+      longjmp(JumpBuffer, 1);
+
+    // Otherwise let the caller decide of the outcome of the crash. Currently
+    // this occurs when using SEH on Windows with MSVC or clang-cl.
   }
 };
-
 }
 
 static ManagedStatic<std::mutex> gCrashRecoveryContextMutex;
@@ -175,6 +175,9 @@ CrashRecoveryContext::unregisterCleanup(CrashRecoveryContextCleanup *cleanup) {
 }
 
 #if defined(_MSC_VER)
+
+#include <windows.h> // for GetExceptionInformation
+
 // If _MSC_VER is defined, we must have SEH. Use it if it's available. It's way
 // better than VEH. Vectored exception handling catches all exceptions happening
 // on the thread with installed exception handlers, so it can interfere with
@@ -188,30 +191,45 @@ static void uninstallExceptionOrSignalHandlers() {}
 
 // We need this function because the call to GetExceptionInformation() can only
 // occur inside the __except evaluation block
-static int ExceptionFilter(bool DumpStackAndCleanup,
-                           _EXCEPTION_POINTERS *Except) {
-  if (DumpStackAndCleanup)
-    sys::CleanupOnSignal((uintptr_t)Except);
-  return EXCEPTION_EXECUTE_HANDLER;
-}
+static int ExceptionFilter(_EXCEPTION_POINTERS *Except) {
+  // Lookup the current thread local recovery object.
+  const CrashRecoveryContextImpl *CRCI = CurrentContext->get();
 
-static bool InvokeFunctionCall(function_ref<void()> Fn,
-                               bool DumpStackAndCleanup, int &RetCode) {
-  __try {
-    Fn();
-  } __except (ExceptionFilter(DumpStackAndCleanup, GetExceptionInformation())) {
-    RetCode = GetExceptionCode();
-    return false;
+  if (!CRCI) {
+    // Something has gone horribly wrong, so let's just tell everyone
+    // to keep searching
+    CrashRecoveryContext::Disable();
+    return EXCEPTION_CONTINUE_SEARCH;
   }
-  return true;
+
+  int RetCode = (int)Except->ExceptionRecord->ExceptionCode;
+  if ((RetCode & 0xF0000000) == 0xE0000000)
+    RetCode &= ~0xF0000000; // this crash was generated by sys::Process::Exit
+
+  // Handle the crash
+  const_cast<CrashRecoveryContextImpl *>(CRCI)->HandleCrash(
+      RetCode, reinterpret_cast<uintptr_t>(Except));
+
+  return EXCEPTION_EXECUTE_HANDLER;
 }
 
+#if defined(__clang__) && defined(_M_IX86)
+// Work around PR44697.
+__attribute__((optnone))
+#endif
 bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) {
   if (!gCrashRecoveryEnabled) {
     Fn();
     return true;
   }
-  return InvokeFunctionCall(Fn, DumpStackAndCleanupOnFailure, RetCode);
+  assert(!Impl && "Crash recovery context already initialized!");
+  Impl = new CrashRecoveryContextImpl(this);
+  __try {
+    Fn();
+  } __except (ExceptionFilter(GetExceptionInformation())) {
+    return false;
+  }
+  return true;
 }
 
 #else // !_MSC_VER
@@ -264,10 +282,13 @@ static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo)
   // TODO: We can capture the stack backtrace here and store it on the
   // implementation if we so choose.
 
+  int RetCode = (int)ExceptionInfo->ExceptionRecord->ExceptionCode;
+  if ((RetCode & 0xF0000000) == 0xE0000000)
+    RetCode &= ~0xF0000000; // this crash was generated by sys::Process::Exit
+
   // Handle the crash
   const_cast<CrashRecoveryContextImpl *>(CRCI)->HandleCrash(
-      (int)ExceptionInfo->ExceptionRecord->ExceptionCode,
-      reinterpret_cast<uintptr_t>(ExceptionInfo));
+      RetCode, reinterpret_cast<uintptr_t>(ExceptionInfo));
 
   // Note that we don't actually get here because HandleCrash calls
   // longjmp, which means the HandleCrash function never returns.
@@ -388,6 +409,7 @@ bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) {
     CrashRecoveryContextImpl *CRCI = new CrashRecoveryContextImpl(this);
     Impl = CRCI;
 
+    CRCI->ValidJumpBuffer = true;
     if (setjmp(CRCI->JumpBuffer) != 0) {
       return false;
     }
@@ -399,12 +421,19 @@ bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) {
 
 #endif // !_MSC_VER
 
-void CrashRecoveryContext::HandleCrash() {
-  CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl;
+LLVM_ATTRIBUTE_NORETURN
+void CrashRecoveryContext::HandleExit(int RetCode) {
+#if defined(_WIN32)
+  // SEH and VEH
+  ::RaiseException(0xE0000000 | RetCode, 0, 0, NULL);
+#else
+  // On Unix we don't need to raise an exception, we go directly to
+  // HandleCrash(), then longjmp will unwind the stack for us.
+  CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *)Impl;
   assert(CRCI && "Crash recovery context never initialized!");
-  // As per convention, -2 indicates a crash or timeout as opposed to failure to
-  // execute (see llvm/include/llvm/Support/Program.h)
-  CRCI->HandleCrash(-2, 0);
+  CRCI->HandleCrash(RetCode, 0 /*no sig num*/);
+#endif
+  llvm_unreachable("Most likely setjmp wasn't called!");
 }
 
 // FIXME: Portability.
diff --git a/llvm/lib/Support/ErrorHandling.cpp b/llvm/lib/Support/ErrorHandling.cpp
index 0f13f7a536f1..a9463024c420 100644
--- a/llvm/lib/Support/ErrorHandling.cpp
+++ b/llvm/lib/Support/ErrorHandling.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/Process.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/Threading.h"
 #include "llvm/Support/WindowsError.h"
@@ -122,7 +123,7 @@ void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) {
   // files registered with RemoveFileOnSignal.
   sys::RunInterruptHandlers();
 
-  exit(1);
+  sys::Process::Exit(1);
 }
 
 void llvm::install_bad_alloc_error_handler(fatal_error_handler_t handler,
diff --git a/llvm/lib/Support/Process.cpp b/llvm/lib/Support/Process.cpp
index 5b6471008159..509512f643d3 100644
--- a/llvm/lib/Support/Process.cpp
+++ b/llvm/lib/Support/Process.cpp
@@ -13,8 +13,9 @@
 #include "llvm/Support/Process.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Config/llvm-config.h"
 #include "llvm/Config/config.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/CrashRecoveryContext.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Program.h"
@@ -88,6 +89,13 @@ static bool coreFilesPrevented = !LLVM_ENABLE_CRASH_DUMPS;
 
 bool Process::AreCoreFilesPrevented() { return coreFilesPrevented; }
 
+LLVM_ATTRIBUTE_NORETURN
+void Process::Exit(int RetCode) {
+  if (CrashRecoveryContext *CRC = CrashRecoveryContext::GetCurrent())
+    CRC->HandleExit(RetCode);
+  ::exit(RetCode);
+}
+
 // Include the platform-specific parts of this class.
 #ifdef LLVM_ON_UNIX
 #include "Unix/Process.inc"
diff --git a/llvm/lib/Support/Windows/Signals.inc b/llvm/lib/Support/Windows/Signals.inc
index 8b525f1bd4ac..09e19ae41f1a 100644
--- a/llvm/lib/Support/Windows/Signals.inc
+++ b/llvm/lib/Support/Windows/Signals.inc
@@ -820,7 +820,13 @@ static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
                    << "\n";
   }
 
-  LocalPrintStackTrace(llvm::errs(), ep ? ep->ContextRecord : nullptr);
+  // Stack unwinding appears to modify the context. Copy it to preserve the
+  // caller's context.
+  CONTEXT ContextCopy;
+  if (ep)
+    memcpy(&ContextCopy, ep->ContextRecord, sizeof(ContextCopy));
+
+  LocalPrintStackTrace(llvm::errs(), ep ? &ContextCopy : nullptr);
 
   return EXCEPTION_EXECUTE_HANDLER;
 }
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index b8953583a310..6da089d1859a 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -1000,6 +1000,26 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   switch (MI->getOpcode()) {
   default:
     break;
+  case AArch64::HINT: {
+    // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for
+    // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be
+    // non-empty. If MI is the initial BTI, place the
+    // __patchable_function_entries label after BTI.
+    if (CurrentPatchableFunctionEntrySym &&
+        CurrentPatchableFunctionEntrySym == CurrentFnBegin &&
+        MI == &MF->front().front()) {
+      int64_t Imm = MI->getOperand(0).getImm();
+      if ((Imm & 32) && (Imm & 6)) {
+        MCInst Inst;
+        MCInstLowering.Lower(MI, Inst);
+        EmitToStreamer(*OutStreamer, Inst);
+        CurrentPatchableFunctionEntrySym = createTempSymbol("patch");
+        OutStreamer->EmitLabel(CurrentPatchableFunctionEntrySym);
+        return;
+      }
+    }
+    break;
+  }
     case AArch64::MOVMCSym: {
       Register DestReg = MI->getOperand(0).getReg();
       const MachineOperand &MO_Sym = MI->getOperand(1);
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index bc91d628f0b4..cbca29b63b70 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -66,6 +66,10 @@ static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
 static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
                                      cl::Hidden);
 
+// Enable register renaming to find additional store pairing opportunities.
+static cl::opt<bool> EnableRenaming("aarch64-load-store-renaming",
+                                    cl::init(false), cl::Hidden);
+
 #define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
 
 namespace {
@@ -1446,6 +1450,9 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
   bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
 
   Optional<bool> MaybeCanRename = None;
+  if (!EnableRenaming)
+    MaybeCanRename = {false};
+
   SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses;
   LiveRegUnits UsedInBetween;
   UsedInBetween.init(*TRI);
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 6f4569a49783..131219ca6944 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -183,7 +183,21 @@ public:
                                      bool &AllowPromotionWithoutCommonHeader);
 
   bool shouldExpandReduction(const IntrinsicInst *II) const {
-    return false;
+    switch (II->getIntrinsicID()) {
+    case Intrinsic::experimental_vector_reduce_v2_fadd:
+    case Intrinsic::experimental_vector_reduce_v2_fmul:
+      // We don't have legalization support for ordered FP reductions.
+      return !II->getFastMathFlags().allowReassoc();
+
+    case Intrinsic::experimental_vector_reduce_fmax:
+    case Intrinsic::experimental_vector_reduce_fmin:
+      // Lowering asserts that there are no NaNs.
+      return !II->getFastMathFlags().noNaNs();
+
+    default:
+      // Don't expand anything else, let legalization deal with it.
+      return false;
+    }
   }
 
   unsigned getGISelRematGlobalCost() const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index fbed51de0ea4..a55a1747cafe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -156,9 +156,6 @@ extern char &SIWholeQuadModeID;
 void initializeSILowerControlFlowPass(PassRegistry &);
 extern char &SILowerControlFlowID;
 
-void initializeSIRemoveShortExecBranchesPass(PassRegistry &);
-extern char &SIRemoveShortExecBranchesID;
-
 void initializeSIInsertSkipsPass(PassRegistry &);
 extern char &SIInsertSkipsPassID;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index eb30d659bf0b..c8dc6f6e3bf4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -228,7 +228,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   initializeSIModeRegisterPass(*PR);
   initializeSIWholeQuadModePass(*PR);
   initializeSILowerControlFlowPass(*PR);
-  initializeSIRemoveShortExecBranchesPass(*PR);
   initializeSIInsertSkipsPass(*PR);
   initializeSIMemoryLegalizerPass(*PR);
   initializeSIOptimizeExecMaskingPass(*PR);
@@ -994,7 +993,6 @@ void GCNPassConfig::addPreEmitPass() {
   // be better for it to emit S_NOP <N> when possible.
   addPass(&PostRAHazardRecognizerID);
 
-  addPass(&SIRemoveShortExecBranchesID);
   addPass(&SIInsertSkipsPassID);
   addPass(&BranchRelaxationPassID);
 }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index 191f603a66d6..01bb60f07f2e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -34,6 +34,7 @@
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Type.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
@@ -117,24 +118,58 @@ static bool isUniformlyReached(const LegacyDivergenceAnalysis &DA,
   return true;
 }
 
+static void removeDoneExport(Function &F) {
+  ConstantInt *BoolFalse = ConstantInt::getFalse(F.getContext());
+  for (BasicBlock &BB : F) {
+    for (Instruction &I : BB) {
+      if (IntrinsicInst *Intrin = llvm::dyn_cast<IntrinsicInst>(&I)) {
+        if (Intrin->getIntrinsicID() == Intrinsic::amdgcn_exp) {
+          Intrin->setArgOperand(6, BoolFalse); // done
+        } else if (Intrin->getIntrinsicID() == Intrinsic::amdgcn_exp_compr) {
+          Intrin->setArgOperand(4, BoolFalse); // done
+        }
+      }
+    }
+  }
+}
+
 static BasicBlock *unifyReturnBlockSet(Function &F,
                                        ArrayRef<BasicBlock *> ReturningBlocks,
+                                       bool InsertExport,
                                        const TargetTransformInfo &TTI,
                                        StringRef Name) {
   // Otherwise, we need to insert a new basic block into the function, add a PHI
   // nodes (if the function returns values), and convert all of the return
   // instructions into unconditional branches.
   BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), Name, &F);
+  IRBuilder<> B(NewRetBlock);
+
+  if (InsertExport) {
+    // Ensure that there's only one "done" export in the shader by removing the
+    // "done" bit set on the original final export. More than one "done" export
+    // can lead to undefined behavior.
+    removeDoneExport(F);
+
+    Value *Undef = UndefValue::get(B.getFloatTy());
+    B.CreateIntrinsic(Intrinsic::amdgcn_exp, { B.getFloatTy() },
+                      {
+                        B.getInt32(9), // target, SQ_EXP_NULL
+                        B.getInt32(0), // enabled channels
+                        Undef, Undef, Undef, Undef, // values
+                        B.getTrue(), // done
+                        B.getTrue(), // valid mask
+                      });
+  }
 
   PHINode *PN = nullptr;
   if (F.getReturnType()->isVoidTy()) {
-    ReturnInst::Create(F.getContext(), nullptr, NewRetBlock);
+    B.CreateRetVoid();
   } else {
     // If the function doesn't return void... add a PHI node to the block...
-    PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(),
-                         "UnifiedRetVal");
-    NewRetBlock->getInstList().push_back(PN);
-    ReturnInst::Create(F.getContext(), PN, NewRetBlock);
+    PN = B.CreatePHI(F.getReturnType(), ReturningBlocks.size(),
+                     "UnifiedRetVal");
+    assert(!InsertExport);
+    B.CreateRet(PN);
   }
 
   // Loop over all of the blocks, replacing the return instruction with an
@@ -173,6 +208,8 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
   // Dummy return block for infinite loop.
   BasicBlock *DummyReturnBB = nullptr;
 
+  bool InsertExport = false;
+
   for (BasicBlock *BB : PDT.getRoots()) {
     if (isa<ReturnInst>(BB->getTerminator())) {
       if (!isUniformlyReached(DA, *BB))
@@ -188,6 +225,36 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
                                            "DummyReturnBlock", &F);
         Type *RetTy = F.getReturnType();
         Value *RetVal = RetTy->isVoidTy() ? nullptr : UndefValue::get(RetTy);
+
+        // For pixel shaders, the producer guarantees that an export is
+        // executed before each return instruction. However, if there is an
+        // infinite loop and we insert a return ourselves, we need to uphold
+        // that guarantee by inserting a null export. This can happen e.g. in
+        // an infinite loop with kill instructions, which is supposed to
+        // terminate. However, we don't need to do this if there is a non-void
+        // return value, since then there is an epilog afterwards which will
+        // still export.
+        //
+        // Note: In the case where only some threads enter the infinite loop,
+        // this can result in the null export happening redundantly after the
+        // original exports. However, The last "real" export happens after all
+        // the threads that didn't enter an infinite loop converged, which
+        // means that the only extra threads to execute the null export are
+        // threads that entered the infinite loop, and they only could've
+        // exited through being killed which sets their exec bit to 0.
+        // Therefore, unless there's an actual infinite loop, which can have
+        // invalid results, or there's a kill after the last export, which we
+        // assume the frontend won't do, this export will have the same exec
+        // mask as the last "real" export, and therefore the valid mask will be
+        // overwritten with the same value and will still be correct. Also,
+        // even though this forces an extra unnecessary export wait, we assume
+        // that this happens rare enough in practice to that we don't have to
+        // worry about performance.
+        if (F.getCallingConv() == CallingConv::AMDGPU_PS &&
+            RetTy->isVoidTy()) {
+          InsertExport = true;
+        }
+
         ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB);
         ReturningBlocks.push_back(DummyReturnBB);
       }
@@ -260,6 +327,6 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
   const TargetTransformInfo &TTI
     = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
 
-  unifyReturnBlockSet(F, ReturningBlocks, TTI, "UnifiedReturnBlock");
+  unifyReturnBlockSet(F, ReturningBlocks, InsertExport, TTI, "UnifiedReturnBlock");
   return true;
 }
diff --git a/llvm/lib/Target/AMDGPU/CaymanInstructions.td b/llvm/lib/Target/AMDGPU/CaymanInstructions.td
index 1a526675164a..e2978624811d 100644
--- a/llvm/lib/Target/AMDGPU/CaymanInstructions.td
+++ b/llvm/lib/Target/AMDGPU/CaymanInstructions.td
@@ -50,6 +50,8 @@ def COS_cm : COS_Common<0x8E>;
 
 def : RsqPat<RECIPSQRT_IEEE_cm, f32>;
 
+def : SqrtPat<RECIPSQRT_IEEE_cm, RECIP_IEEE_cm>;
+
 def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
 
 defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
@@ -70,8 +72,6 @@ def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> {
 
 
 
-def : R600Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>;
-
 class RAT_STORE_DWORD <RegisterClass rc, ValueType vt, bits<4> mask> :
   CF_MEM_RAT_CACHELESS <0x14, 0, mask,
                         (ins rc:$rw_gpr, R600_TReg32_X:$index_gpr),
diff --git a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
index 792e26d21f98..88e554ae0bcc 100644
--- a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
+++ b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
@@ -118,11 +118,12 @@ def LOG_IEEE_eg : LOG_IEEE_Common<0x83>;
 def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
 def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
 def : RsqPat<RECIPSQRT_IEEE_eg, f32>;
+def : SqrtPat<RECIPSQRT_IEEE_eg, RECIP_IEEE_eg>;
+
 def SIN_eg : SIN_Common<0x8D>;
 def COS_eg : COS_Common<0x8E>;
 
 def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>;
-def : EGPat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>;
 } // End SubtargetPredicate = isEG
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td
index cbdf0de44f87..869c183e2245 100644
--- a/llvm/lib/Target/AMDGPU/R600Instructions.td
+++ b/llvm/lib/Target/AMDGPU/R600Instructions.td
@@ -1233,6 +1233,11 @@ def : R600Pat<
 def : RcpPat<recip_ieee, f32>;
 }
 
+class SqrtPat<Instruction RsqInst, Instruction RecipInst> : R600Pat <
+  (fsqrt f32:$src),
+  (RecipInst (RsqInst $src))
+>;
+
 //===----------------------------------------------------------------------===//
 // R600 / R700 Instructions
 //===----------------------------------------------------------------------===//
@@ -1272,8 +1277,8 @@ let Predicates = [isR600] in {
   defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
   def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL>;
 
-  def : R600Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>;
   def : RsqPat<RECIPSQRT_IEEE_r600, f32>;
+  def : SqrtPat<RECIPSQRT_IEEE_r600, RECIP_IEEE_r600>;
 
   def R600_ExportSwz : ExportSwzInst {
     let Word1{20-17} = 0; // BURST_COUNT
diff --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
index 80c044ec00cb..87e63fcc4a04 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -41,7 +41,7 @@ using namespace llvm;
 #define DEBUG_TYPE "si-insert-skips"
 
 static cl::opt<unsigned> SkipThresholdFlag(
-  "amdgpu-skip-threshold-legacy",
+  "amdgpu-skip-threshold",
   cl::desc("Number of instructions before jumping over divergent control flow"),
   cl::init(12), cl::Hidden);
 
@@ -466,9 +466,6 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
       MachineInstr &MI = *I;
 
       switch (MI.getOpcode()) {
-      case AMDGPU::S_CBRANCH_EXECZ:
-        ExecBranchStack.push_back(MI.getOperand(0).getMBB());
-        break;
       case AMDGPU::SI_MASK_BRANCH:
         ExecBranchStack.push_back(MI.getOperand(0).getMBB());
         MadeChange |= skipMaskBranch(MI, MBB);
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 61d2719a3aad..bf052dc3c930 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -244,9 +244,9 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
     BuildMI(MBB, I, DL, TII->get(MovTermOpc), Exec)
     .addReg(Tmp, RegState::Kill);
 
-  // Insert the S_CBRANCH_EXECZ instruction which will be optimized later
-  // during SIRemoveShortExecBranches.
-  MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
+  // Insert a pseudo terminator to help keep the verifier happy. This will also
+  // be used later when inserting skips.
+  MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
                             .add(MI.getOperand(2));
 
   if (!LIS) {
@@ -323,8 +323,8 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
     .addReg(DstReg);
 
   MachineInstr *Branch =
-      BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
-          .addMBB(DestBB);
+    BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
+    .addMBB(DestBB);
 
   if (!LIS) {
     MI.eraseFromParent();
diff --git a/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp b/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp
deleted file mode 100644
index 51779e97ac62..000000000000
--- a/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp
+++ /dev/null
@@ -1,158 +0,0 @@
-//===-- SIRemoveShortExecBranches.cpp ------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// This pass optmizes the s_cbranch_execz instructions.
-/// The pass removes this skip instruction for short branches,
-/// if there is no unwanted sideeffect in the fallthrough code sequence.
-///
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIInstrInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Support/CommandLine.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "si-remove-short-exec-branches"
-
-static unsigned SkipThreshold;
-
-static cl::opt<unsigned, true> SkipThresholdFlag(
-    "amdgpu-skip-threshold", cl::Hidden,
-    cl::desc(
-        "Number of instructions before jumping over divergent control flow"),
-    cl::location(SkipThreshold), cl::init(12));
-
-namespace {
-
-class SIRemoveShortExecBranches : public MachineFunctionPass {
-private:
-  const SIInstrInfo *TII = nullptr;
-  bool getBlockDestinations(MachineBasicBlock &SrcMBB,
-                            MachineBasicBlock *&TrueMBB,
-                            MachineBasicBlock *&FalseMBB,
-                            SmallVectorImpl<MachineOperand> &Cond);
-  bool mustRetainExeczBranch(const MachineBasicBlock &From,
-                             const MachineBasicBlock &To) const;
-  bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);
-
-public:
-  static char ID;
-
-  SIRemoveShortExecBranches() : MachineFunctionPass(ID) {
-    initializeSIRemoveShortExecBranchesPass(*PassRegistry::getPassRegistry());
-  }
-
-  bool runOnMachineFunction(MachineFunction &MF) override;
-};
-
-} // End anonymous namespace.
-
-INITIALIZE_PASS(SIRemoveShortExecBranches, DEBUG_TYPE,
-                "SI remove short exec branches", false, false)
-
-char SIRemoveShortExecBranches::ID = 0;
-
-char &llvm::SIRemoveShortExecBranchesID = SIRemoveShortExecBranches::ID;
-
-bool SIRemoveShortExecBranches::getBlockDestinations(
-    MachineBasicBlock &SrcMBB, MachineBasicBlock *&TrueMBB,
-    MachineBasicBlock *&FalseMBB, SmallVectorImpl<MachineOperand> &Cond) {
-  if (TII->analyzeBranch(SrcMBB, TrueMBB, FalseMBB, Cond))
-    return false;
-
-  if (!FalseMBB)
-    FalseMBB = SrcMBB.getNextNode();
-
-  return true;
-}
-
-bool SIRemoveShortExecBranches::mustRetainExeczBranch(
-    const MachineBasicBlock &From, const MachineBasicBlock &To) const {
-  unsigned NumInstr = 0;
-  const MachineFunction *MF = From.getParent();
-
-  for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end();
-       MBBI != End && MBBI != ToI; ++MBBI) {
-    const MachineBasicBlock &MBB = *MBBI;
-
-    for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
-         I != E; ++I) {
-      // When a uniform loop is inside non-uniform control flow, the branch
-      // leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken
-      // when EXEC = 0. We should skip the loop lest it becomes infinite.
-      if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ ||
-          I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
-        return true;
-
-      if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
-        return true;
-
-      // These instructions are potentially expensive even if EXEC = 0.
-      if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) ||
-          I->getOpcode() == AMDGPU::S_WAITCNT)
-        return true;
-
-      ++NumInstr;
-      if (NumInstr >= SkipThreshold)
-        return true;
-    }
-  }
-
-  return false;
-}
-
-// Returns true if the skip branch instruction is removed.
-bool SIRemoveShortExecBranches::removeExeczBranch(MachineInstr &MI,
-                                                  MachineBasicBlock &SrcMBB) {
-  MachineBasicBlock *TrueMBB = nullptr;
-  MachineBasicBlock *FalseMBB = nullptr;
-  SmallVector<MachineOperand, 1> Cond;
-
-  if (!getBlockDestinations(SrcMBB, TrueMBB, FalseMBB, Cond))
-    return false;
-
-  // Consider only the forward branches.
-  if ((SrcMBB.getNumber() >= TrueMBB->getNumber()) ||
-      mustRetainExeczBranch(*FalseMBB, *TrueMBB))
-    return false;
-
-  LLVM_DEBUG(dbgs() << "Removing the execz branch: " << MI);
-  MI.eraseFromParent();
-  SrcMBB.removeSuccessor(TrueMBB);
-
-  return true;
-}
-
-bool SIRemoveShortExecBranches::runOnMachineFunction(MachineFunction &MF) {
-  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
-  TII = ST.getInstrInfo();
-  MF.RenumberBlocks();
-  bool Changed = false;
-
-  for (MachineBasicBlock &MBB : MF) {
-    MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
-    if (MBBI == MBB.end())
-      continue;
-
-    MachineInstr &MI = *MBBI;
-    switch (MI.getOpcode()) {
-    case AMDGPU::S_CBRANCH_EXECZ:
-      Changed = removeExeczBranch(MI, MBB);
-      break;
-    default:
-      break;
-    }
-  }
-
-  return Changed;
-}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 5271bc3aacc6..8b21b9346987 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -559,7 +559,7 @@ bool isReadOnlySegment(const GlobalValue *GV) {
 }
 
 bool shouldEmitConstantsToTextSection(const Triple &TT) {
-  return TT.getOS() == Triple::AMDPAL;
+  return TT.getOS() == Triple::AMDPAL || TT.getArch() == Triple::r600;
 }
 
 int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index 634fb89b8e89..66ad120a111f 100644
--- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -330,8 +330,8 @@ void ARMConstantIslands::verify() {
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 /// print block size and offset information - debugging
 LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() {
-  BBInfoVector &BBInfo = BBUtils->getBBInfo();
   LLVM_DEBUG({
+    BBInfoVector &BBInfo = BBUtils->getBBInfo();
     for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
       const BasicBlockInfo &BBI = BBInfo[J];
       dbgs() << format("%08x %bb.%u\t", BBI.Offset, J)
diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 2c3ac816219f..de4377ec5a47 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -1952,24 +1952,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
       MI.eraseFromParent();
       return true;
     }
-    case ARM::LOADDUAL:
-    case ARM::STOREDUAL: {
-      Register PairReg = MI.getOperand(0).getReg();
-
-      MachineInstrBuilder MIB =
-          BuildMI(MBB, MBBI, MI.getDebugLoc(),
-                  TII->get(Opcode == ARM::LOADDUAL ? ARM::LDRD : ARM::STRD))
-              .addReg(TRI->getSubReg(PairReg, ARM::gsub_0),
-                      Opcode == ARM::LOADDUAL ? RegState::Define : 0)
-              .addReg(TRI->getSubReg(PairReg, ARM::gsub_1),
-                      Opcode == ARM::LOADDUAL ? RegState::Define : 0);
-      for (unsigned i = 1; i < MI.getNumOperands(); i++)
-        MIB.add(MI.getOperand(i));
-      MIB.add(predOps(ARMCC::AL));
-      MIB.cloneMemRefs(MI);
-      MI.eraseFromParent();
-      return true;
-    }
   }
 }
 
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 76a9ac12062d..9b06987178d8 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -145,8 +145,6 @@ public:
 
   // Thumb 2 Addressing Modes:
   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
-  template <unsigned Shift>
-  bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
                             SDValue &OffImm);
   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
@@ -1296,33 +1294,6 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
   return true;
 }
 
-template <unsigned Shift>
-bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
-                                           SDValue &OffImm) {
-  if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
-    int RHSC;
-    if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) {
-      Base = N.getOperand(0);
-      if (Base.getOpcode() == ISD::FrameIndex) {
-        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
-        Base = CurDAG->getTargetFrameIndex(
-            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
-      }
-
-      if (N.getOpcode() == ISD::SUB)
-        RHSC = -RHSC;
-      OffImm =
-          CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
-      return true;
-    }
-  }
-
-  // Base only.
-  Base = N;
-  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
-  return true;
-}
-
 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
                                            SDValue &Base, SDValue &OffImm) {
   // Match simple R - imm8 operands.
@@ -3515,26 +3486,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
     CurDAG->RemoveDeadNode(N);
     return;
   }
-  case ARMISD::LDRD: {
-    if (Subtarget->isThumb2())
-      break; // TableGen handles isel in this case.
-    SDValue Base, RegOffset, ImmOffset;
-    const SDValue &Chain = N->getOperand(0);
-    const SDValue &Addr = N->getOperand(1);
-    SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
-    SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
-    SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl,
-                                         {MVT::Untyped, MVT::Other}, Ops);
-    SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
-                                                SDValue(New, 0));
-    SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
-                                                SDValue(New, 0));
-    ReplaceUses(SDValue(N, 0), Lo);
-    ReplaceUses(SDValue(N, 1), Hi);
-    ReplaceUses(SDValue(N, 2), SDValue(New, 1));
-    CurDAG->RemoveDeadNode(N);
-    return;
-  }
   case ARMISD::LOOP_DEC: {
     SDValue Ops[] = { N->getOperand(1),
                       N->getOperand(2),
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index cf738cd66434..1e6f7d889201 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1073,8 +1073,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::SRA,       MVT::i64, Custom);
   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
-  setOperationAction(ISD::LOAD, MVT::i64, Custom);
-  setOperationAction(ISD::STORE, MVT::i64, Custom);
 
   // MVE lowers 64 bit shifts to lsll and lsrl
   // assuming that ISD::SRL and SRA of i64 are already marked custom
@@ -1598,9 +1596,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
 
   case ARMISD::PRELOAD:       return "ARMISD::PRELOAD";
 
-  case ARMISD::LDRD:          return "ARMISD::LDRD";
-  case ARMISD::STRD:          return "ARMISD::STRD";
-
   case ARMISD::WIN__CHKSTK:   return "ARMISD::WIN__CHKSTK";
   case ARMISD::WIN__DBZCHK:   return "ARMISD::WIN__DBZCHK";
 
@@ -9088,24 +9083,6 @@ static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG) {
   return DAG.getMergeValues({Pred, Load.getValue(1)}, dl);
 }
 
-void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
-                                  SelectionDAG &DAG) const {
-  LoadSDNode *LD = cast<LoadSDNode>(N);
-  EVT MemVT = LD->getMemoryVT();
-  assert(LD->isUnindexed() && "Loads should be unindexed at this point.");
-
-  if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
-      !Subtarget->isThumb1Only() && LD->isVolatile()) {
-    SDLoc dl(N);
-    SDValue Result = DAG.getMemIntrinsicNode(
-        ARMISD::LDRD, dl, DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
-        {LD->getChain(), LD->getBasePtr()}, MemVT, LD->getMemOperand());
-    SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
-                               Result.getValue(0), Result.getValue(1));
-    Results.append({Pair, Result.getValue(2)});
-  }
-}
-
 static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) {
   StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
   EVT MemVT = ST->getMemoryVT();
@@ -9135,34 +9112,6 @@ static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) {
       ST->getMemOperand());
 }
 
-static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG,
-                          const ARMSubtarget *Subtarget) {
-  StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
-  EVT MemVT = ST->getMemoryVT();
-  assert(ST->isUnindexed() && "Stores should be unindexed at this point.");
-
-  if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
-      !Subtarget->isThumb1Only() && ST->isVolatile()) {
-    SDNode *N = Op.getNode();
-    SDLoc dl(N);
-
-    SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
-                             DAG.getTargetConstant(0, dl, MVT::i32));
-    SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
-                             DAG.getTargetConstant(1, dl, MVT::i32));
-
-    return DAG.getMemIntrinsicNode(ARMISD::STRD, dl, DAG.getVTList(MVT::Other),
-                                   {ST->getChain(), Lo, Hi, ST->getBasePtr()},
-                                   MemVT, ST->getMemOperand());
-  } else if (Subtarget->hasMVEIntegerOps() &&
-             ((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
-               MemVT == MVT::v16i1))) {
-    return LowerPredicateStore(Op, DAG);
-  }
-
-  return SDValue();
-}
-
 static bool isZeroVector(SDValue N) {
   return (ISD::isBuildVectorAllZeros(N.getNode()) ||
           (N->getOpcode() == ARMISD::VMOVIMM &&
@@ -9350,7 +9299,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::LOAD:
     return LowerPredicateLoad(Op, DAG);
   case ISD::STORE:
-    return LowerSTORE(Op, DAG, Subtarget);
+    return LowerPredicateStore(Op, DAG);
   case ISD::MLOAD:
     return LowerMLOAD(Op, DAG);
   case ISD::ATOMIC_LOAD:
@@ -9452,9 +9401,7 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
   case ISD::ABS:
      lowerABS(N, Results, DAG);
      return ;
-  case ISD::LOAD:
-    LowerLOAD(N, Results, DAG);
-    break;
+
   }
   if (Res.getNode())
     Results.push_back(Res);
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 1baa22a4fa56..cc74e5d875d8 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -278,11 +278,7 @@ class VectorType;
       VST4_UPD,
       VST2LN_UPD,
       VST3LN_UPD,
-      VST4LN_UPD,
-
-      // Load/Store of dual registers
-      LDRD,
-      STRD
+      VST4LN_UPD
     };
 
   } // end namespace ARMISD
@@ -735,8 +731,6 @@ class VectorType;
     SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
     void lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results,
                   SelectionDAG &DAG) const;
-    void LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
-                   SelectionDAG &DAG) const;
 
     Register getRegisterByName(const char* RegName, LLT VT,
                                const MachineFunction &MF) const override;
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index ce67af6f1b49..3efe85a7d45c 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -243,12 +243,6 @@ def ARMqsub8b       : SDNode<"ARMISD::QSUB8b", SDT_ARMAnd, []>;
 def ARMqadd16b      : SDNode<"ARMISD::QADD16b", SDT_ARMAnd, []>;
 def ARMqsub16b      : SDNode<"ARMISD::QSUB16b", SDT_ARMAnd, []>;
 
-def SDT_ARMldrd     : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
-def ARMldrd         : SDNode<"ARMISD::LDRD", SDT_ARMldrd, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-
-def SDT_ARMstrd     : SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
-def ARMstrd         : SDNode<"ARMISD::STRD", SDT_ARMstrd, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
-
 // Vector operations shared between NEON and MVE
 
 def ARMvdup      : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
@@ -2701,14 +2695,6 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
              Requires<[IsARM, HasV5TE]>;
 }
 
-let mayLoad = 1, hasSideEffects = 0, hasNoSchedulingInfo = 1 in {
-def LOADDUAL : ARMPseudoInst<(outs GPRPairOp:$Rt), (ins addrmode3:$addr),
-                             64, IIC_iLoad_d_r, []>,
-               Requires<[IsARM, HasV5TE]> {
-  let AM = AddrMode3;
-}
-}
-
 def LDA : AIldracq<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr),
                     NoItinerary, "lda", "\t$Rt, $addr", []>;
 def LDAB : AIldracq<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr),
@@ -2984,19 +2970,6 @@ let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
   }
 }
 
-let mayStore = 1, hasSideEffects = 0, hasNoSchedulingInfo = 1 in {
-def STOREDUAL : ARMPseudoInst<(outs), (ins GPRPairOp:$Rt, addrmode3:$addr),
-                              64, IIC_iStore_d_r, []>,
-                Requires<[IsARM, HasV5TE]> {
-  let AM = AddrMode3;
-}
-}
-
-let Predicates = [IsARM, HasV5TE] in {
-def : Pat<(ARMstrd GPR:$Rt, GPR:$Rt2, addrmode3:$addr),
-          (STOREDUAL (REG_SEQUENCE GPRPair, GPR:$Rt, gsub_0, GPR:$Rt2, gsub_1), addrmode3:$addr)>;
-}
-
 // Indexed stores
 multiclass AI2_stridx<bit isByte, string opc,
                       InstrItinClass iii, InstrItinClass iir> {
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 4193e8147f47..c5aae235f25d 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -270,8 +270,7 @@ def t2am_imm8_offset : MemOperand,
 
 // t2addrmode_imm8s4  := reg +/- (imm8 << 2)
 def MemImm8s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm8s4Offset";}
-class T2AddrMode_Imm8s4 : MemOperand,
-                          ComplexPattern<i32, 2, "SelectT2AddrModeImm8<2>", []> {
+class T2AddrMode_Imm8s4 : MemOperand {
   let EncoderMethod = "getT2AddrModeImm8s4OpValue";
   let DecoderMethod = "DecodeT2AddrModeImm8s4";
   let ParserMatchClass = MemImm8s4OffsetAsmOperand;
@@ -1449,8 +1448,7 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
 // Load doubleword
 def t2LDRDi8  : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2),
                         (ins t2addrmode_imm8s4:$addr),
-                        IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "",
-                        [(set rGPR:$Rt, rGPR:$Rt2, (ARMldrd t2addrmode_imm8s4:$addr))]>,
+                        IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>,
                  Sched<[WriteLd]>;
 } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
 
@@ -1631,8 +1629,7 @@ defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
 let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in
 def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
                        (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr),
-               IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "",
-               [(ARMstrd rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr)]>,
+               IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>,
                Sched<[WriteST]>;
 
 // Indexed stores
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 880588adfdfd..f66083eaf187 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -171,7 +171,26 @@ public:
                              TTI::ReductionFlags Flags) const;
 
   bool shouldExpandReduction(const IntrinsicInst *II) const {
-    return false;
+    switch (II->getIntrinsicID()) {
+    case Intrinsic::experimental_vector_reduce_v2_fadd:
+    case Intrinsic::experimental_vector_reduce_v2_fmul:
+      // We don't have legalization support for ordered FP reductions.
+      if (!II->getFastMathFlags().allowReassoc())
+        return true;
+      // Can't legalize reductions with soft floats.
+      return TLI->useSoftFloat() || !TLI->getSubtarget()->hasFPRegs();
+
+    case Intrinsic::experimental_vector_reduce_fmin:
+    case Intrinsic::experimental_vector_reduce_fmax:
+      // Can't legalize reductions with soft floats, and NoNan will create
+      // fminimum which we do not know how to lower.
+      return TLI->useSoftFloat() || !TLI->getSubtarget()->hasFPRegs() ||
+             !II->getFastMathFlags().noNaNs();
+
+    default:
+      // Don't expand anything else, let legalization deal with it.
+      return false;
+    }
   }
 
   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h
index b81bf4e1320d..cbae4675cb14 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.h
+++ b/llvm/lib/Target/BPF/BPFISelLowering.h
@@ -110,6 +110,19 @@ private:
     return true;
   }
 
+  // Prevent reducing load width during SelectionDag phase.
+  // Otherwise, we may transform the following
+  //   ctx = ctx + reloc_offset
+  //   ... (*(u32 *)ctx) & 0x8000...
+  // to
+  //   ctx = ctx + reloc_offset
+  //   ... (*(u8 *)(ctx + 1)) & 0x80 ...
+  // which will be rejected by the verifier.
+  bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
+                             EVT NewVT) const override {
+    return false;
+  }
+
   unsigned EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB, unsigned Reg,
                          bool isSigned) const;
 
diff --git a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
index 5310f0f07b65..29abc9303a62 100644
--- a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
+++ b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
@@ -70,9 +70,10 @@ private:
 public:
   // Main entry point for this pass.
   bool runOnMachineFunction(MachineFunction &MF) override {
-    if (!skipFunction(MF.getFunction())) {
-      initialize(MF);
-    }
+    if (skipFunction(MF.getFunction()))
+      return false;
+
+    initialize(MF);
     return removeLD();
   }
 };
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 53562f42a184..c7efdf42a7c6 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -195,12 +195,13 @@ public:
     Parser.addAliasForDirective(".dword", ".8byte");
     setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
 
-    if (Options.ABIName.back() == 'f' &&
+    auto ABIName = StringRef(Options.ABIName);
+    if (ABIName.endswith("f") &&
         !getSTI().getFeatureBits()[RISCV::FeatureStdExtF]) {
       errs() << "Hard-float 'f' ABI can't be used for a target that "
                 "doesn't support the F instruction set extension (ignoring "
                 "target-abi)\n";
-    } else if (Options.ABIName.back() == 'd' &&
+    } else if (ABIName.endswith("d") &&
                !getSTI().getFeatureBits()[RISCV::FeatureStdExtD]) {
       errs() << "Hard-float 'd' ABI can't be used for a target that "
                 "doesn't support the D instruction set extension (ignoring "
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index 82afa13aece3..770e883221d1 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -92,10 +92,13 @@ include "RISCVSystemOperands.td"
 // Registers, calling conventions, instruction descriptions.
 //===----------------------------------------------------------------------===//
 
+include "RISCVSchedule.td"
 include "RISCVRegisterInfo.td"
 include "RISCVCallingConv.td"
 include "RISCVInstrInfo.td"
 include "RISCVRegisterBanks.td"
+include "RISCVSchedRocket32.td"
+include "RISCVSchedRocket64.td"
 
 //===----------------------------------------------------------------------===//
 // RISC-V processors supported.
@@ -106,6 +109,12 @@ def : ProcessorModel<"generic-rv32", NoSchedModel, [FeatureRVCHints]>;
 def : ProcessorModel<"generic-rv64", NoSchedModel, [Feature64Bit,
                      FeatureRVCHints]>;
 
+def : ProcessorModel<"rocket-rv32", Rocket32Model, [FeatureRVCHints]>;
+
+def : ProcessorModel<"rocket-rv64", Rocket64Model, [Feature64Bit,
+                     FeatureRVCHints]>;
+
+
 //===----------------------------------------------------------------------===//
 // Define the RISC-V target.
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index 7229ebfe1db0..3ed10cca5377 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -103,7 +103,8 @@ class RVInst<dag outs, dag ins, string opcodestr, string argstr,
 
 // Pseudo instructions
 class Pseudo<dag outs, dag ins, list<dag> pattern, string opcodestr = "", string argstr = "">
-    : RVInst<outs, ins, opcodestr, argstr, pattern, InstFormatPseudo> {
+    : RVInst<outs, ins, opcodestr, argstr, pattern, InstFormatPseudo>,
+      Sched<[]> {
   let isPseudo = 1;
   let isCodeGenOnly = 1;
 }
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 8e9ad4965583..81f1abe8337e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -298,7 +298,8 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
 class BranchCC_rri<bits<3> funct3, string opcodestr>
     : RVInstB<funct3, OPC_BRANCH, (outs),
               (ins GPR:$rs1, GPR:$rs2, simm13_lsb0:$imm12),
-              opcodestr, "$rs1, $rs2, $imm12"> {
+              opcodestr, "$rs1, $rs2, $imm12">,
+      Sched<[WriteJmp]> {
   let isBranch = 1;
   let isTerminator = 1;
 }
@@ -320,13 +321,15 @@ class Store_rri<bits<3> funct3, string opcodestr>
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
 class ALU_ri<bits<3> funct3, string opcodestr>
     : RVInstI<funct3, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1, simm12:$imm12),
-              opcodestr, "$rd, $rs1, $imm12">;
+              opcodestr, "$rd, $rs1, $imm12">,
+      Sched<[WriteIALU, ReadIALU]>;
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
 class Shift_ri<bit arithshift, bits<3> funct3, string opcodestr>
     : RVInstIShift<arithshift, funct3, OPC_OP_IMM, (outs GPR:$rd),
                    (ins GPR:$rs1, uimmlog2xlen:$shamt), opcodestr,
-                   "$rd, $rs1, $shamt">;
+                   "$rd, $rs1, $shamt">,
+      Sched<[WriteShift, ReadShift]>;
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
 class ALU_rr<bits<7> funct7, bits<3> funct3, string opcodestr>
@@ -336,19 +339,20 @@ class ALU_rr<bits<7> funct7, bits<3> funct3, string opcodestr>
 let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
 class CSR_ir<bits<3> funct3, string opcodestr>
     : RVInstI<funct3, OPC_SYSTEM, (outs GPR:$rd), (ins csr_sysreg:$imm12, GPR:$rs1),
-              opcodestr, "$rd, $imm12, $rs1">;
+              opcodestr, "$rd, $imm12, $rs1">, Sched<[WriteCSR, ReadCSR]>;
 
 let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
 class CSR_ii<bits<3> funct3, string opcodestr>
     : RVInstI<funct3, OPC_SYSTEM, (outs GPR:$rd),
               (ins csr_sysreg:$imm12, uimm5:$rs1),
-              opcodestr, "$rd, $imm12, $rs1">;
+              opcodestr, "$rd, $imm12, $rs1">, Sched<[WriteCSR]>;
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
 class ShiftW_ri<bit arithshift, bits<3> funct3, string opcodestr>
     : RVInstIShiftW<arithshift, funct3, OPC_OP_IMM_32, (outs GPR:$rd),
                     (ins GPR:$rs1, uimm5:$shamt), opcodestr,
-                    "$rd, $rs1, $shamt">;
+                    "$rd, $rs1, $shamt">,
+      Sched<[WriteShift32, ReadShift32]>;
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
 class ALUW_rr<bits<7> funct7, bits<3> funct3, string opcodestr>
@@ -367,19 +371,20 @@ class Priv<string opcodestr, bits<7> funct7>
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in
 def LUI : RVInstU<OPC_LUI, (outs GPR:$rd), (ins uimm20_lui:$imm20),
-                  "lui", "$rd, $imm20">;
+                  "lui", "$rd, $imm20">, Sched<[WriteIALU]>;
 
 def AUIPC : RVInstU<OPC_AUIPC, (outs GPR:$rd), (ins uimm20_auipc:$imm20),
-                    "auipc", "$rd, $imm20">;
+                    "auipc", "$rd, $imm20">, Sched<[WriteIALU]>;
 
 let isCall = 1 in
 def JAL : RVInstJ<OPC_JAL, (outs GPR:$rd), (ins simm21_lsb0_jal:$imm20),
-                  "jal", "$rd, $imm20">;
+                  "jal", "$rd, $imm20">, Sched<[WriteJal]>;
 
 let isCall = 1 in
 def JALR : RVInstI<0b000, OPC_JALR, (outs GPR:$rd),
                    (ins GPR:$rs1, simm12:$imm12),
-                   "jalr", "$rd, ${imm12}(${rs1})">;
+                   "jalr", "$rd, ${imm12}(${rs1})">,
+           Sched<[WriteJalr, ReadJalr]>;
 } // hasSideEffects = 0, mayLoad = 0, mayStore = 0
 
 def BEQ  : BranchCC_rri<0b000, "beq">;
@@ -389,15 +394,15 @@ def BGE  : BranchCC_rri<0b101, "bge">;
 def BLTU : BranchCC_rri<0b110, "bltu">;
 def BGEU : BranchCC_rri<0b111, "bgeu">;
 
-def LB  : Load_ri<0b000, "lb">;
-def LH  : Load_ri<0b001, "lh">;
-def LW  : Load_ri<0b010, "lw">;
-def LBU : Load_ri<0b100, "lbu">;
-def LHU : Load_ri<0b101, "lhu">;
+def LB  : Load_ri<0b000, "lb">, Sched<[WriteLDB, ReadMemBase]>;
+def LH  : Load_ri<0b001, "lh">, Sched<[WriteLDH, ReadMemBase]>;
+def LW  : Load_ri<0b010, "lw">, Sched<[WriteLDW, ReadMemBase]>;
+def LBU : Load_ri<0b100, "lbu">, Sched<[WriteLDB, ReadMemBase]>;
+def LHU : Load_ri<0b101, "lhu">, Sched<[WriteLDH, ReadMemBase]>;
 
-def SB : Store_rri<0b000, "sb">;
-def SH : Store_rri<0b001, "sh">;
-def SW : Store_rri<0b010, "sw">;
+def SB : Store_rri<0b000, "sb">, Sched<[WriteSTB, ReadStoreData, ReadMemBase]>;
+def SH : Store_rri<0b001, "sh">, Sched<[WriteSTH, ReadStoreData, ReadMemBase]>;
+def SW : Store_rri<0b010, "sw">, Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
 
 // ADDI isn't always rematerializable, but isReMaterializable will be used as
 // a hint which is verified in isReallyTriviallyReMaterializable.
@@ -418,21 +423,21 @@ def SLLI : Shift_ri<0, 0b001, "slli">;
 def SRLI : Shift_ri<0, 0b101, "srli">;
 def SRAI : Shift_ri<1, 0b101, "srai">;
 
-def ADD  : ALU_rr<0b0000000, 0b000, "add">;
-def SUB  : ALU_rr<0b0100000, 0b000, "sub">;
-def SLL  : ALU_rr<0b0000000, 0b001, "sll">;
-def SLT  : ALU_rr<0b0000000, 0b010, "slt">;
-def SLTU : ALU_rr<0b0000000, 0b011, "sltu">;
-def XOR  : ALU_rr<0b0000000, 0b100, "xor">;
-def SRL  : ALU_rr<0b0000000, 0b101, "srl">;
-def SRA  : ALU_rr<0b0100000, 0b101, "sra">;
-def OR   : ALU_rr<0b0000000, 0b110, "or">;
-def AND  : ALU_rr<0b0000000, 0b111, "and">;
+def ADD  : ALU_rr<0b0000000, 0b000, "add">, Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def SUB  : ALU_rr<0b0100000, 0b000, "sub">, Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def SLL  : ALU_rr<0b0000000, 0b001, "sll">, Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def SLT  : ALU_rr<0b0000000, 0b010, "slt">, Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def SLTU : ALU_rr<0b0000000, 0b011, "sltu">, Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def XOR  : ALU_rr<0b0000000, 0b100, "xor">, Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def SRL  : ALU_rr<0b0000000, 0b101, "srl">, Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def SRA  : ALU_rr<0b0100000, 0b101, "sra">, Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def OR   : ALU_rr<0b0000000, 0b110, "or">, Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def AND  : ALU_rr<0b0000000, 0b111, "and">, Sched<[WriteIALU, ReadIALU, ReadIALU]>;
 
 let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in {
 def FENCE : RVInstI<0b000, OPC_MISC_MEM, (outs),
                     (ins fencearg:$pred, fencearg:$succ),
-                    "fence", "$pred, $succ"> {
+                    "fence", "$pred, $succ">, Sched<[]> {
   bits<4> pred;
   bits<4> succ;
 
@@ -441,25 +446,26 @@ def FENCE : RVInstI<0b000, OPC_MISC_MEM, (outs),
   let imm12 = {0b0000,pred,succ};
 }
 
-def FENCE_TSO : RVInstI<0b000, OPC_MISC_MEM, (outs), (ins), "fence.tso", ""> {
+def FENCE_TSO : RVInstI<0b000, OPC_MISC_MEM, (outs), (ins), "fence.tso", "">, Sched<[]> {
   let rs1 = 0;
   let rd = 0;
   let imm12 = {0b1000,0b0011,0b0011};
 }
 
-def FENCE_I : RVInstI<0b001, OPC_MISC_MEM, (outs), (ins), "fence.i", ""> {
+def FENCE_I : RVInstI<0b001, OPC_MISC_MEM, (outs), (ins), "fence.i", "">, Sched<[]> {
   let rs1 = 0;
   let rd = 0;
   let imm12 = 0;
 }
 
-def ECALL : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "ecall", ""> {
+def ECALL : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "ecall", "">, Sched<[WriteJmp]> {
   let rs1 = 0;
   let rd = 0;
   let imm12 = 0;
 }
 
-def EBREAK : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "ebreak", ""> {
+def EBREAK : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "ebreak", "">,
+             Sched<[]> {
   let rs1 = 0;
   let rd = 0;
   let imm12 = 1;
@@ -468,7 +474,8 @@ def EBREAK : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "ebreak", ""> {
 // This is a de facto standard (as set by GNU binutils) 32-bit unimplemented
 // instruction (i.e., it should always trap, if your implementation has invalid
 // instruction traps).
-def UNIMP : RVInstI<0b001, OPC_SYSTEM, (outs), (ins), "unimp", ""> {
+def UNIMP : RVInstI<0b001, OPC_SYSTEM, (outs), (ins), "unimp", "">,
+            Sched<[]> {
   let rs1 = 0;
   let rd = 0;
   let imm12 = 0b110000000000;
@@ -486,24 +493,30 @@ def CSRRCI : CSR_ii<0b111, "csrrci">;
 /// RV64I instructions
 
 let Predicates = [IsRV64] in {
-def LWU   : Load_ri<0b110, "lwu">;
-def LD    : Load_ri<0b011, "ld">;
-def SD    : Store_rri<0b011, "sd">;
+def LWU   : Load_ri<0b110, "lwu">, Sched<[WriteLDWU, ReadMemBase]>;
+def LD    : Load_ri<0b011, "ld">, Sched<[WriteLDD, ReadMemBase]>;
+def SD    : Store_rri<0b011, "sd">, Sched<[WriteSTD, ReadStoreData, ReadMemBase]>;
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
 def ADDIW : RVInstI<0b000, OPC_OP_IMM_32, (outs GPR:$rd),
                     (ins GPR:$rs1, simm12:$imm12),
-                    "addiw", "$rd, $rs1, $imm12">;
+                    "addiw", "$rd, $rs1, $imm12">,
+            Sched<[WriteIALU32, ReadIALU32]>;
 
 def SLLIW : ShiftW_ri<0, 0b001, "slliw">;
 def SRLIW : ShiftW_ri<0, 0b101, "srliw">;
 def SRAIW : ShiftW_ri<1, 0b101, "sraiw">;
 
-def ADDW  : ALUW_rr<0b0000000, 0b000, "addw">;
-def SUBW  : ALUW_rr<0b0100000, 0b000, "subw">;
-def SLLW  : ALUW_rr<0b0000000, 0b001, "sllw">;
-def SRLW  : ALUW_rr<0b0000000, 0b101, "srlw">;
-def SRAW  : ALUW_rr<0b0100000, 0b101, "sraw">;
+def ADDW  : ALUW_rr<0b0000000, 0b000, "addw">,
+            Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>;
+def SUBW  : ALUW_rr<0b0100000, 0b000, "subw">,
+            Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>;
+def SLLW  : ALUW_rr<0b0000000, 0b001, "sllw">,
+            Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>;
+def SRLW  : ALUW_rr<0b0000000, 0b101, "srlw">,
+            Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>;
+def SRAW  : ALUW_rr<0b0100000, 0b101, "sraw">,
+            Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>;
 } // Predicates = [IsRV64]
 
 //===----------------------------------------------------------------------===//
@@ -511,26 +524,26 @@ def SRAW  : ALUW_rr<0b0100000, 0b101, "sraw">;
 //===----------------------------------------------------------------------===//
 
 let isBarrier = 1, isReturn = 1, isTerminator = 1 in {
-def URET : Priv<"uret", 0b0000000> {
+def URET : Priv<"uret", 0b0000000>, Sched<[]> {
   let rd = 0;
   let rs1 = 0;
   let rs2 = 0b00010;
 }
 
-def SRET : Priv<"sret", 0b0001000> {
+def SRET : Priv<"sret", 0b0001000>, Sched<[]> {
   let rd = 0;
   let rs1 = 0;
   let rs2 = 0b00010;
 }
 
-def MRET : Priv<"mret", 0b0011000> {
+def MRET : Priv<"mret", 0b0011000>, Sched<[]> {
   let rd = 0;
   let rs1 = 0;
   let rs2 = 0b00010;
 }
 } // isBarrier = 1, isReturn = 1, isTerminator = 1
 
-def WFI : Priv<"wfi", 0b0001000> {
+def WFI : Priv<"wfi", 0b0001000>, Sched<[]> {
   let rd = 0;
   let rs1 = 0;
   let rs2 = 0b00101;
@@ -539,7 +552,7 @@ def WFI : Priv<"wfi", 0b0001000> {
 let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
 def SFENCE_VMA : RVInstR<0b0001001, 0b000, OPC_SYSTEM, (outs),
                          (ins GPR:$rs1, GPR:$rs2),
-                         "sfence.vma", "$rs1, $rs2"> {
+                         "sfence.vma", "$rs1, $rs2">, Sched<[]> {
   let rd = 0;
 }
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index 7321f4bd9d2f..de73c8df9367 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -77,31 +77,51 @@ multiclass AtomicStPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy> {
 //===----------------------------------------------------------------------===//
 
 let Predicates = [HasStdExtA] in {
-defm LR_W       : LR_r_aq_rl<0b010, "lr.w">;
-defm SC_W       : AMO_rr_aq_rl<0b00011, 0b010, "sc.w">;
-defm AMOSWAP_W  : AMO_rr_aq_rl<0b00001, 0b010, "amoswap.w">;
-defm AMOADD_W   : AMO_rr_aq_rl<0b00000, 0b010, "amoadd.w">;
-defm AMOXOR_W   : AMO_rr_aq_rl<0b00100, 0b010, "amoxor.w">;
-defm AMOAND_W   : AMO_rr_aq_rl<0b01100, 0b010, "amoand.w">;
-defm AMOOR_W    : AMO_rr_aq_rl<0b01000, 0b010, "amoor.w">;
-defm AMOMIN_W   : AMO_rr_aq_rl<0b10000, 0b010, "amomin.w">;
-defm AMOMAX_W   : AMO_rr_aq_rl<0b10100, 0b010, "amomax.w">;
-defm AMOMINU_W  : AMO_rr_aq_rl<0b11000, 0b010, "amominu.w">;
-defm AMOMAXU_W  : AMO_rr_aq_rl<0b11100, 0b010, "amomaxu.w">;
+defm LR_W       : LR_r_aq_rl<0b010, "lr.w">, Sched<[WriteAtomicLDW, ReadAtomicLDW]>;
+defm SC_W       : AMO_rr_aq_rl<0b00011, 0b010, "sc.w">,
+                  Sched<[WriteAtomicSTW, ReadAtomicSTW, ReadAtomicSTW]>;
+defm AMOSWAP_W  : AMO_rr_aq_rl<0b00001, 0b010, "amoswap.w">,
+                  Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>;
+defm AMOADD_W   : AMO_rr_aq_rl<0b00000, 0b010, "amoadd.w">,
+                  Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>;
+defm AMOXOR_W   : AMO_rr_aq_rl<0b00100, 0b010, "amoxor.w">,
+                  Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>;
+defm AMOAND_W   : AMO_rr_aq_rl<0b01100, 0b010, "amoand.w">,
+                  Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>;
+defm AMOOR_W    : AMO_rr_aq_rl<0b01000, 0b010, "amoor.w">,
+                  Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>;
+defm AMOMIN_W   : AMO_rr_aq_rl<0b10000, 0b010, "amomin.w">,
+                  Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>;
+defm AMOMAX_W   : AMO_rr_aq_rl<0b10100, 0b010, "amomax.w">,
+                  Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>;
+defm AMOMINU_W  : AMO_rr_aq_rl<0b11000, 0b010, "amominu.w">,
+                  Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>;
+defm AMOMAXU_W  : AMO_rr_aq_rl<0b11100, 0b010, "amomaxu.w">,
+                  Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>;
 } // Predicates = [HasStdExtA]
 
 let Predicates = [HasStdExtA, IsRV64] in {
-defm LR_D       : LR_r_aq_rl<0b011, "lr.d">;
-defm SC_D       : AMO_rr_aq_rl<0b00011, 0b011, "sc.d">;
-defm AMOSWAP_D  : AMO_rr_aq_rl<0b00001, 0b011, "amoswap.d">;
-defm AMOADD_D   : AMO_rr_aq_rl<0b00000, 0b011, "amoadd.d">;
-defm AMOXOR_D   : AMO_rr_aq_rl<0b00100, 0b011, "amoxor.d">;
-defm AMOAND_D   : AMO_rr_aq_rl<0b01100, 0b011, "amoand.d">;
-defm AMOOR_D    : AMO_rr_aq_rl<0b01000, 0b011, "amoor.d">;
-defm AMOMIN_D   : AMO_rr_aq_rl<0b10000, 0b011, "amomin.d">;
-defm AMOMAX_D   : AMO_rr_aq_rl<0b10100, 0b011, "amomax.d">;
-defm AMOMINU_D  : AMO_rr_aq_rl<0b11000, 0b011, "amominu.d">;
-defm AMOMAXU_D  : AMO_rr_aq_rl<0b11100, 0b011, "amomaxu.d">;
+defm LR_D       : LR_r_aq_rl<0b011, "lr.d">, Sched<[WriteAtomicLDD, ReadAtomicLDD]>;
+defm SC_D       : AMO_rr_aq_rl<0b00011, 0b011, "sc.d">,
+                  Sched<[WriteAtomicSTD, ReadAtomicSTD, ReadAtomicSTD]>;
+defm AMOSWAP_D  : AMO_rr_aq_rl<0b00001, 0b011, "amoswap.d">,
+                  Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>;
+defm AMOADD_D   : AMO_rr_aq_rl<0b00000, 0b011, "amoadd.d">,
+                  Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>;
+defm AMOXOR_D   : AMO_rr_aq_rl<0b00100, 0b011, "amoxor.d">,
+                  Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>;
+defm AMOAND_D   : AMO_rr_aq_rl<0b01100, 0b011, "amoand.d">,
+                  Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>;
+defm AMOOR_D    : AMO_rr_aq_rl<0b01000, 0b011, "amoor.d">,
+                  Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>;
+defm AMOMIN_D   : AMO_rr_aq_rl<0b10000, 0b011, "amomin.d">,
+                  Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>;
+defm AMOMAX_D   : AMO_rr_aq_rl<0b10100, 0b011, "amomax.d">,
+                  Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>;
+defm AMOMINU_D  : AMO_rr_aq_rl<0b11000, 0b011, "amominu.d">,
+                  Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>;
+defm AMOMAXU_D  : AMO_rr_aq_rl<0b11100, 0b011, "amomaxu.d">,
+                  Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>;
 } // Predicates = [HasStdExtA, IsRV64]
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
index fa0050f107b2..f68767847ade 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
@@ -282,7 +282,8 @@ let Predicates = [HasStdExtC] in {
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [X2] in
 def C_ADDI4SPN : RVInst16CIW<0b000, 0b00, (outs GPRC:$rd),
                              (ins SP:$rs1, uimm10_lsb00nonzero:$imm),
-                             "c.addi4spn", "$rd, $rs1, $imm"> {
+                             "c.addi4spn", "$rd, $rs1, $imm">,
+                             Sched<[WriteIALU, ReadIALU]> {
   bits<5> rs1;
   let Inst{12-11} = imm{5-4};
   let Inst{10-7} = imm{9-6};
@@ -291,13 +292,15 @@ def C_ADDI4SPN : RVInst16CIW<0b000, 0b00, (outs GPRC:$rd),
 }
 
 let Predicates = [HasStdExtC, HasStdExtD] in
-def C_FLD  : CLoad_ri<0b001, "c.fld", FPR64C, uimm8_lsb000> {
+def C_FLD  : CLoad_ri<0b001, "c.fld", FPR64C, uimm8_lsb000>,
+             Sched<[WriteFLD64, ReadMemBase]> {
   bits<8> imm;
   let Inst{12-10} = imm{5-3};
   let Inst{6-5} = imm{7-6};
 }
 
-def C_LW : CLoad_ri<0b010, "c.lw", GPRC, uimm7_lsb00> {
+def C_LW : CLoad_ri<0b010, "c.lw", GPRC, uimm7_lsb00>,
+           Sched<[WriteLDW, ReadMemBase]> {
   bits<7> imm;
   let Inst{12-10} = imm{5-3};
   let Inst{6} = imm{2};
@@ -306,7 +309,8 @@ def C_LW : CLoad_ri<0b010, "c.lw", GPRC, uimm7_lsb00> {
 
 let DecoderNamespace = "RISCV32Only_",
     Predicates = [HasStdExtC, HasStdExtF, IsRV32] in
-def C_FLW  : CLoad_ri<0b011, "c.flw", FPR32C, uimm7_lsb00> {
+def C_FLW  : CLoad_ri<0b011, "c.flw", FPR32C, uimm7_lsb00>,
+             Sched<[WriteFLD32, ReadMemBase]> {
   bits<7> imm;
   let Inst{12-10} = imm{5-3};
   let Inst{6} = imm{2};
@@ -314,20 +318,23 @@ def C_FLW  : CLoad_ri<0b011, "c.flw", FPR32C, uimm7_lsb00> {
 }
 
 let Predicates = [HasStdExtC, IsRV64] in
-def C_LD : CLoad_ri<0b011, "c.ld", GPRC, uimm8_lsb000> {
+def C_LD : CLoad_ri<0b011, "c.ld", GPRC, uimm8_lsb000>,
+           Sched<[WriteLDD, ReadMemBase]> {
   bits<8> imm;
   let Inst{12-10} = imm{5-3};
   let Inst{6-5} = imm{7-6};
 }
 
 let Predicates = [HasStdExtC, HasStdExtD] in
-def C_FSD  : CStore_rri<0b101, "c.fsd", FPR64C, uimm8_lsb000> {
+def C_FSD  : CStore_rri<0b101, "c.fsd", FPR64C, uimm8_lsb000>,
+             Sched<[WriteFST64, ReadStoreData, ReadMemBase]> {
   bits<8> imm;
   let Inst{12-10} = imm{5-3};
   let Inst{6-5} = imm{7-6};
 }
 
-def C_SW : CStore_rri<0b110, "c.sw", GPRC, uimm7_lsb00> {
+def C_SW : CStore_rri<0b110, "c.sw", GPRC, uimm7_lsb00>,
+           Sched<[WriteSTW, ReadStoreData, ReadMemBase]> {
   bits<7> imm;
   let Inst{12-10} = imm{5-3};
   let Inst{6} = imm{2};
@@ -336,7 +343,8 @@ def C_SW : CStore_rri<0b110, "c.sw", GPRC, uimm7_lsb00> {
 
 let DecoderNamespace = "RISCV32Only_",
     Predicates = [HasStdExtC, HasStdExtF, IsRV32]  in
-def C_FSW  : CStore_rri<0b111, "c.fsw", FPR32C, uimm7_lsb00> {
+def C_FSW  : CStore_rri<0b111, "c.fsw", FPR32C, uimm7_lsb00>,
+             Sched<[WriteFST32, ReadStoreData, ReadMemBase]> {
   bits<7> imm;
   let Inst{12-10} = imm{5-3};
   let Inst{6} = imm{2};
@@ -344,14 +352,16 @@ def C_FSW  : CStore_rri<0b111, "c.fsw", FPR32C, uimm7_lsb00> {
 }
 
 let Predicates = [HasStdExtC, IsRV64] in
-def C_SD : CStore_rri<0b111, "c.sd", GPRC, uimm8_lsb000> {
+def C_SD : CStore_rri<0b111, "c.sd", GPRC, uimm8_lsb000>,
+           Sched<[WriteSTD, ReadStoreData, ReadMemBase]> {
   bits<8> imm;
   let Inst{12-10} = imm{5-3};
   let Inst{6-5} = imm{7-6};
 }
 
 let rd = 0, imm = 0, hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-def C_NOP : RVInst16CI<0b000, 0b01, (outs), (ins), "c.nop", "">
+def C_NOP : RVInst16CI<0b000, 0b01, (outs), (ins), "c.nop", "">,
+            Sched<[WriteNop]>
 {
   let Inst{6-2} = 0;
 }
@@ -359,7 +369,8 @@ def C_NOP : RVInst16CI<0b000, 0b01, (outs), (ins), "c.nop", "">
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
 def C_ADDI : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb),
                         (ins GPRNoX0:$rd, simm6nonzero:$imm),
-                        "c.addi", "$rd, $imm"> {
+                        "c.addi", "$rd, $imm">,
+             Sched<[WriteIALU, ReadIALU]> {
   let Constraints = "$rd = $rd_wb";
   let Inst{6-2} = imm{4-0};
 }
@@ -367,7 +378,8 @@ def C_ADDI : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb),
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
 def C_ADDI_NOP : RVInst16CI<0b000, 0b01, (outs GPRX0:$rd_wb),
                             (ins GPRX0:$rd, immzero:$imm),
-                            "c.addi", "$rd, $imm"> {
+                            "c.addi", "$rd, $imm">,
+                 Sched<[WriteIALU, ReadIALU]> {
   let Constraints = "$rd = $rd_wb";
   let Inst{6-2} = 0;
   let isAsmParserOnly = 1;
@@ -377,27 +389,30 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCall = 1,
     DecoderNamespace = "RISCV32Only_", Defs = [X1],
     Predicates = [HasStdExtC, IsRV32]  in
 def C_JAL : RVInst16CJ<0b001, 0b01, (outs), (ins simm12_lsb0:$offset),
-                       "c.jal", "$offset">;
+                       "c.jal", "$offset">, Sched<[WriteJal]>;
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0,
     Predicates = [HasStdExtC, IsRV64] in
 def C_ADDIW : RVInst16CI<0b001, 0b01, (outs GPRNoX0:$rd_wb),
                          (ins GPRNoX0:$rd, simm6:$imm),
-                         "c.addiw", "$rd, $imm"> {
+                         "c.addiw", "$rd, $imm">,
+              Sched<[WriteIALU32, ReadIALU32]> {
   let Constraints = "$rd = $rd_wb";
   let Inst{6-2} = imm{4-0};
 }
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
 def C_LI : RVInst16CI<0b010, 0b01, (outs GPRNoX0:$rd), (ins simm6:$imm),
-                      "c.li", "$rd, $imm"> {
+                      "c.li", "$rd, $imm">,
+           Sched<[WriteIALU]> {
   let Inst{6-2} = imm{4-0};
 }
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
 def C_ADDI16SP : RVInst16CI<0b011, 0b01, (outs SP:$rd_wb),
                             (ins SP:$rd, simm10_lsb0000nonzero:$imm),
-                            "c.addi16sp", "$rd, $imm"> {
+                            "c.addi16sp", "$rd, $imm">,
+                 Sched<[WriteIALU, ReadIALU]> {
   let Constraints = "$rd = $rd_wb";
   let Inst{12} = imm{9};
   let Inst{11-7} = 2;
@@ -410,78 +425,93 @@ def C_ADDI16SP : RVInst16CI<0b011, 0b01, (outs SP:$rd_wb),
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
 def C_LUI : RVInst16CI<0b011, 0b01, (outs GPRNoX0X2:$rd),
                        (ins c_lui_imm:$imm),
-                       "c.lui", "$rd, $imm"> {
+                       "c.lui", "$rd, $imm">,
+            Sched<[WriteIALU]> {
   let Inst{6-2} = imm{4-0};
 }
 
-def C_SRLI : Shift_right<0b00, "c.srli", GPRC, uimmlog2xlennonzero>;
-def C_SRAI : Shift_right<0b01, "c.srai", GPRC, uimmlog2xlennonzero>;
+def C_SRLI : Shift_right<0b00, "c.srli", GPRC, uimmlog2xlennonzero>,
+             Sched<[WriteShift, ReadShift]>;
+def C_SRAI : Shift_right<0b01, "c.srai", GPRC, uimmlog2xlennonzero>,
+             Sched<[WriteShift, ReadShift]>;
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
 def C_ANDI : RVInst16CB<0b100, 0b01, (outs GPRC:$rs1_wb), (ins GPRC:$rs1, simm6:$imm),
-                        "c.andi", "$rs1, $imm"> {
+                        "c.andi", "$rs1, $imm">,
+             Sched<[WriteIALU, ReadIALU]> {
   let Constraints = "$rs1 = $rs1_wb";
   let Inst{12} = imm{5};
   let Inst{11-10} = 0b10;
   let Inst{6-2} = imm{4-0};
 }
 
-def C_SUB  : CS_ALU<0b100011, 0b00, "c.sub", GPRC>;
-def C_XOR  : CS_ALU<0b100011, 0b01, "c.xor", GPRC>;
-def C_OR   : CS_ALU<0b100011, 0b10, "c.or" , GPRC>;
-def C_AND  : CS_ALU<0b100011, 0b11, "c.and", GPRC>;
+def C_SUB  : CS_ALU<0b100011, 0b00, "c.sub", GPRC>,
+             Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def C_XOR  : CS_ALU<0b100011, 0b01, "c.xor", GPRC>,
+             Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def C_OR   : CS_ALU<0b100011, 0b10, "c.or" , GPRC>,
+             Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def C_AND  : CS_ALU<0b100011, 0b11, "c.and", GPRC>,
+             Sched<[WriteIALU, ReadIALU, ReadIALU]>;
 
 let Predicates = [HasStdExtC, IsRV64] in {
-def C_SUBW : CS_ALU<0b100111, 0b00, "c.subw", GPRC>;
-def C_ADDW : CS_ALU<0b100111, 0b01, "c.addw", GPRC>;
+def C_SUBW : CS_ALU<0b100111, 0b00, "c.subw", GPRC>,
+             Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>;
+def C_ADDW : CS_ALU<0b100111, 0b01, "c.addw", GPRC>,
+             Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>;
 }
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
 def C_J : RVInst16CJ<0b101, 0b01, (outs), (ins simm12_lsb0:$offset),
-                     "c.j", "$offset"> {
+                     "c.j", "$offset">, Sched<[WriteJmp]> {
   let isBranch = 1;
   let isTerminator=1;
   let isBarrier=1;
 }
 
-def C_BEQZ : Bcz<0b110, "c.beqz",  seteq, GPRC>;
-def C_BNEZ : Bcz<0b111, "c.bnez",  setne, GPRC>;
+def C_BEQZ : Bcz<0b110, "c.beqz",  seteq, GPRC>, Sched<[WriteJmp]>;
+def C_BNEZ : Bcz<0b111, "c.bnez",  setne, GPRC>, Sched<[WriteJmp]>;
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
 def C_SLLI : RVInst16CI<0b000, 0b10, (outs GPRNoX0:$rd_wb),
                         (ins GPRNoX0:$rd, uimmlog2xlennonzero:$imm),
-                        "c.slli" ,"$rd, $imm"> {
+                        "c.slli" ,"$rd, $imm">,
+             Sched<[WriteShift, ReadShift]> {
   let Constraints = "$rd = $rd_wb";
   let Inst{6-2} = imm{4-0};
 }
 
 let Predicates = [HasStdExtC, HasStdExtD] in
-def C_FLDSP  : CStackLoad<0b001, "c.fldsp", FPR64, uimm9_lsb000> {
+def C_FLDSP  : CStackLoad<0b001, "c.fldsp", FPR64, uimm9_lsb000>,
+               Sched<[WriteFLD64, ReadMemBase]> {
   let Inst{6-5} = imm{4-3};
   let Inst{4-2} = imm{8-6};
 }
 
-def C_LWSP : CStackLoad<0b010, "c.lwsp", GPRNoX0, uimm8_lsb00> {
+def C_LWSP : CStackLoad<0b010, "c.lwsp", GPRNoX0, uimm8_lsb00>,
+             Sched<[WriteLDW, ReadMemBase]> {
   let Inst{6-4} = imm{4-2};
   let Inst{3-2} = imm{7-6};
 }
 
 let DecoderNamespace = "RISCV32Only_",
     Predicates = [HasStdExtC, HasStdExtF, IsRV32] in
-def C_FLWSP  : CStackLoad<0b011, "c.flwsp", FPR32, uimm8_lsb00> {
+def C_FLWSP  : CStackLoad<0b011, "c.flwsp", FPR32, uimm8_lsb00>,
+               Sched<[WriteFLD32, ReadMemBase]> {
   let Inst{6-4} = imm{4-2};
   let Inst{3-2} = imm{7-6};
 }
 
 let Predicates = [HasStdExtC, IsRV64] in
-def C_LDSP : CStackLoad<0b011, "c.ldsp", GPRNoX0, uimm9_lsb000> {
+def C_LDSP : CStackLoad<0b011, "c.ldsp", GPRNoX0, uimm9_lsb000>,
+             Sched<[WriteLDD, ReadMemBase]> {
   let Inst{6-5} = imm{4-3};
   let Inst{4-2} = imm{8-6};
 }
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
 def C_JR : RVInst16CR<0b1000, 0b10, (outs), (ins GPRNoX0:$rs1),
-                      "c.jr", "$rs1"> {
+                      "c.jr", "$rs1">, Sched<[WriteJmpReg]> {
   let isBranch = 1;
   let isBarrier = 1;
   let isTerminator = 1;
@@ -491,43 +521,49 @@ def C_JR : RVInst16CR<0b1000, 0b10, (outs), (ins GPRNoX0:$rs1),
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
 def C_MV : RVInst16CR<0b1000, 0b10, (outs GPRNoX0:$rs1), (ins GPRNoX0:$rs2),
-                      "c.mv", "$rs1, $rs2">;
+                      "c.mv", "$rs1, $rs2">,
+           Sched<[WriteIALU, ReadIALU]>;
 
 let rs1 = 0, rs2 = 0, hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
-def C_EBREAK : RVInst16CR<0b1001, 0b10, (outs), (ins), "c.ebreak", "">;
+def C_EBREAK : RVInst16CR<0b1001, 0b10, (outs), (ins), "c.ebreak", "">, Sched<[]>;
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0,
     isCall=1, Defs=[X1], rs2 = 0 in
 def C_JALR : RVInst16CR<0b1001, 0b10, (outs), (ins GPRNoX0:$rs1),
-                        "c.jalr", "$rs1">;
+                        "c.jalr", "$rs1">, Sched<[WriteJalr, ReadJalr]>;
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
 def C_ADD : RVInst16CR<0b1001, 0b10, (outs GPRNoX0:$rs1_wb),
                        (ins GPRNoX0:$rs1, GPRNoX0:$rs2),
-                       "c.add", "$rs1, $rs2"> {
+                       "c.add", "$rs1, $rs2">,
+            Sched<[WriteIALU, ReadIALU, ReadIALU]> {
   let Constraints = "$rs1 = $rs1_wb";
 }
 
 let Predicates = [HasStdExtC, HasStdExtD] in
-def C_FSDSP  : CStackStore<0b101, "c.fsdsp", FPR64, uimm9_lsb000> {
+def C_FSDSP  : CStackStore<0b101, "c.fsdsp", FPR64, uimm9_lsb000>,
+               Sched<[WriteFST64, ReadStoreData, ReadMemBase]> {
   let Inst{12-10} = imm{5-3};
   let Inst{9-7}   = imm{8-6};
 }
 
-def C_SWSP : CStackStore<0b110, "c.swsp", GPR, uimm8_lsb00> {
+def C_SWSP : CStackStore<0b110, "c.swsp", GPR, uimm8_lsb00>,
+             Sched<[WriteSTW, ReadStoreData, ReadMemBase]> {
   let Inst{12-9} = imm{5-2};
   let Inst{8-7}  = imm{7-6};
 }
 
 let DecoderNamespace = "RISCV32Only_",
     Predicates = [HasStdExtC, HasStdExtF, IsRV32] in
-def C_FSWSP  : CStackStore<0b111, "c.fswsp", FPR32, uimm8_lsb00> {
+def C_FSWSP  : CStackStore<0b111, "c.fswsp", FPR32, uimm8_lsb00>,
+               Sched<[WriteFST32, ReadStoreData, ReadMemBase]> {
   let Inst{12-9} = imm{5-2};
   let Inst{8-7}  = imm{7-6};
 }
 
 let Predicates = [HasStdExtC, IsRV64] in
-def C_SDSP : CStackStore<0b111, "c.sdsp", GPR, uimm9_lsb000> {
+def C_SDSP : CStackStore<0b111, "c.sdsp", GPR, uimm9_lsb000>,
+             Sched<[WriteSTD, ReadStoreData, ReadMemBase]> {
   let Inst{12-10} = imm{5-3};
   let Inst{9-7}   = imm{8-6};
 }
@@ -535,7 +571,8 @@ def C_SDSP : CStackStore<0b111, "c.sdsp", GPR, uimm9_lsb000> {
 // The all zeros pattern isn't a valid RISC-V instruction. It's used by GNU
 // binutils as 16-bit instruction known to be unimplemented (i.e., trapping).
 let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
-def C_UNIMP : RVInst16<(outs), (ins), "c.unimp", "", [], InstFormatOther> {
+def C_UNIMP : RVInst16<(outs), (ins), "c.unimp", "", [], InstFormatOther>,
+              Sched<[]> {
   let Inst{15-0} = 0;
 }
 
@@ -551,7 +588,7 @@ let Predicates = [HasStdExtC, HasRVCHints], hasSideEffects = 0, mayLoad = 0,
 
 let rd = 0 in
 def C_NOP_HINT : RVInst16CI<0b000, 0b01, (outs), (ins simm6nonzero:$imm),
-                            "c.nop", "$imm"> {
+                            "c.nop", "$imm">, Sched<[WriteNop]> {
   let Inst{6-2} = imm{4-0};
   let DecoderMethod = "decodeRVCInstrSImm";
 }
@@ -559,7 +596,8 @@ def C_NOP_HINT : RVInst16CI<0b000, 0b01, (outs), (ins simm6nonzero:$imm),
 // Just a different syntax for the c.nop hint: c.addi x0, simm6 vs c.nop simm6.
 def C_ADDI_HINT_X0 : RVInst16CI<0b000, 0b01, (outs GPRX0:$rd_wb),
                                 (ins GPRX0:$rd, simm6nonzero:$imm),
-                                "c.addi", "$rd, $imm"> {
+                                "c.addi", "$rd, $imm">,
+                     Sched<[WriteIALU, ReadIALU]> {
   let Constraints = "$rd = $rd_wb";
   let Inst{6-2} = imm{4-0};
   let isAsmParserOnly = 1;
@@ -567,14 +605,16 @@ def C_ADDI_HINT_X0 : RVInst16CI<0b000, 0b01, (outs GPRX0:$rd_wb),
 
 def C_ADDI_HINT_IMM_ZERO : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb),
                                       (ins GPRNoX0:$rd, immzero:$imm),
-                                      "c.addi", "$rd, $imm"> {
+                                      "c.addi", "$rd, $imm">,
+                           Sched<[WriteIALU, ReadIALU]> {
   let Constraints = "$rd = $rd_wb";
   let Inst{6-2} = 0;
   let isAsmParserOnly = 1;
 }
 
 def C_LI_HINT : RVInst16CI<0b010, 0b01, (outs GPRX0:$rd), (ins simm6:$imm),
-                           "c.li", "$rd, $imm"> {
+                           "c.li", "$rd, $imm">,
+                Sched<[WriteIALU]> {
   let Inst{6-2} = imm{4-0};
   let Inst{11-7} = 0;
   let DecoderMethod = "decodeRVCInstrRdSImm";
@@ -582,14 +622,15 @@ def C_LI_HINT : RVInst16CI<0b010, 0b01, (outs GPRX0:$rd), (ins simm6:$imm),
 
 def C_LUI_HINT : RVInst16CI<0b011, 0b01, (outs GPRX0:$rd),
                             (ins c_lui_imm:$imm),
-                            "c.lui", "$rd, $imm"> {
+                            "c.lui", "$rd, $imm">,
+                 Sched<[WriteIALU]> {
   let Inst{6-2} = imm{4-0};
   let Inst{11-7} = 0;
   let DecoderMethod = "decodeRVCInstrRdSImm";
 }
 
 def C_MV_HINT : RVInst16CR<0b1000, 0b10, (outs GPRX0:$rs1), (ins GPRNoX0:$rs2),
-                           "c.mv", "$rs1, $rs2">
+                           "c.mv", "$rs1, $rs2">, Sched<[WriteIALU, ReadIALU]>
 {
   let Inst{11-7} = 0;
   let DecoderMethod = "decodeRVCInstrRdRs2";
@@ -597,7 +638,8 @@ def C_MV_HINT : RVInst16CR<0b1000, 0b10, (outs GPRX0:$rs1), (ins GPRNoX0:$rs2),
 
 def C_ADD_HINT : RVInst16CR<0b1001, 0b10, (outs GPRX0:$rs1_wb),
                             (ins GPRX0:$rs1, GPRNoX0:$rs2),
-                            "c.add", "$rs1, $rs2"> {
+                            "c.add", "$rs1, $rs2">,
+                 Sched<[WriteIALU, ReadIALU, ReadIALU]> {
   let Constraints = "$rs1 = $rs1_wb";
   let Inst{11-7} = 0;
   let DecoderMethod = "decodeRVCInstrRdRs1Rs2";
@@ -605,7 +647,8 @@ def C_ADD_HINT : RVInst16CR<0b1001, 0b10, (outs GPRX0:$rs1_wb),
 
 def C_SLLI_HINT : RVInst16CI<0b000, 0b10, (outs GPRX0:$rd_wb),
                              (ins GPRX0:$rd, uimmlog2xlennonzero:$imm),
-                             "c.slli" ,"$rd, $imm"> {
+                             "c.slli" ,"$rd, $imm">,
+                  Sched<[WriteShift, ReadShift]> {
   let Constraints = "$rd = $rd_wb";
   let Inst{6-2} = imm{4-0};
   let Inst{11-7} = 0;
@@ -613,7 +656,8 @@ def C_SLLI_HINT : RVInst16CI<0b000, 0b10, (outs GPRX0:$rd_wb),
 }
 
 def C_SLLI64_HINT : RVInst16CI<0b000, 0b10, (outs GPR:$rd_wb), (ins GPR:$rd),
-                               "c.slli64" ,"$rd"> {
+                               "c.slli64" ,"$rd">,
+                    Sched<[WriteShift, ReadShift]> {
   let Constraints = "$rd = $rd_wb";
   let Inst{6-2} = 0;
   let Inst{12} = 0;
@@ -621,7 +665,8 @@ def C_SLLI64_HINT : RVInst16CI<0b000, 0b10, (outs GPR:$rd_wb), (ins GPR:$rd),
 
 def C_SRLI64_HINT : RVInst16CI<0b100, 0b01, (outs GPRC:$rd_wb),
                                (ins GPRC:$rd),
-                               "c.srli64", "$rd"> {
+                               "c.srli64", "$rd">,
+                    Sched<[WriteShift, ReadShift]> {
   let Constraints = "$rd = $rd_wb";
   let Inst{6-2} = 0;
   let Inst{11-10} = 0;
@@ -630,7 +675,8 @@ def C_SRLI64_HINT : RVInst16CI<0b100, 0b01, (outs GPRC:$rd_wb),
 
 def C_SRAI64_HINT : RVInst16CI<0b100, 0b01, (outs GPRC:$rd_wb),
                                (ins GPRC:$rd),
-                               "c.srai64", "$rd"> {
+                               "c.srai64", "$rd">,
+                    Sched<[WriteShift, ReadShift]> {
   let Constraints = "$rd = $rd_wb";
   let Inst{6-2} = 0;
   let Inst{11-10} = 1;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
index b5343e8a8309..4a036eb52bb8 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -42,13 +42,15 @@ class FPFMADDynFrmAlias<FPFMAD_rrr_frm Inst, string OpcodeStr>
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
 class FPALUD_rr<bits<7> funct7, bits<3> funct3, string opcodestr>
     : RVInstR<funct7, funct3, OPC_OP_FP, (outs FPR64:$rd),
-              (ins FPR64:$rs1, FPR64:$rs2), opcodestr, "$rd, $rs1, $rs2">;
+              (ins FPR64:$rs1, FPR64:$rs2), opcodestr, "$rd, $rs1, $rs2">,
+      Sched<[WriteFALU64, ReadFALU64, ReadFALU64]>;
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
 class FPALUD_rr_frm<bits<7> funct7, string opcodestr>
     : RVInstRFrm<funct7, OPC_OP_FP, (outs FPR64:$rd),
                 (ins FPR64:$rs1, FPR64:$rs2, frmarg:$funct3), opcodestr,
-                 "$rd, $rs1, $rs2, $funct3">;
+                 "$rd, $rs1, $rs2, $funct3">,
+      Sched<[WriteFALU64, ReadFALU64, ReadFALU64]>;
 
 class FPALUDDynFrmAlias<FPALUD_rr_frm Inst, string OpcodeStr>
     : InstAlias<OpcodeStr#" $rd, $rs1, $rs2",
@@ -57,7 +59,8 @@ class FPALUDDynFrmAlias<FPALUD_rr_frm Inst, string OpcodeStr>
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
 class FPCmpD_rr<bits<3> funct3, string opcodestr>
     : RVInstR<0b1010001, funct3, OPC_OP_FP, (outs GPR:$rd),
-              (ins FPR64:$rs1, FPR64:$rs2), opcodestr, "$rd, $rs1, $rs2">;
+              (ins FPR64:$rs1, FPR64:$rs2), opcodestr, "$rd, $rs1, $rs2">,
+      Sched<[WriteFCmp64, ReadFCmp64, ReadFCmp64]>;
 
 //===----------------------------------------------------------------------===//
 // Instructions
@@ -68,7 +71,8 @@ let Predicates = [HasStdExtD] in {
 let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
 def FLD : RVInstI<0b011, OPC_LOAD_FP, (outs FPR64:$rd),
                   (ins GPR:$rs1, simm12:$imm12),
-                  "fld", "$rd, ${imm12}(${rs1})">;
+                  "fld", "$rd, ${imm12}(${rs1})">,
+          Sched<[WriteFLD64, ReadMemBase]>;
 
 // Operands for stores are in the order srcreg, base, offset rather than
 // reflecting the order these fields are specified in the instruction
@@ -76,15 +80,20 @@ def FLD : RVInstI<0b011, OPC_LOAD_FP, (outs FPR64:$rd),
 let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
 def FSD : RVInstS<0b011, OPC_STORE_FP, (outs),
                   (ins FPR64:$rs2, GPR:$rs1, simm12:$imm12),
-                   "fsd", "$rs2, ${imm12}(${rs1})">;
+                   "fsd", "$rs2, ${imm12}(${rs1})">,
+          Sched<[WriteFST64, ReadStoreData, ReadMemBase]>;
 
-def FMADD_D  : FPFMAD_rrr_frm<OPC_MADD, "fmadd.d">;
+def FMADD_D  : FPFMAD_rrr_frm<OPC_MADD, "fmadd.d">,
+               Sched<[WriteFMulAdd64, ReadFMulAdd64, ReadFMulAdd64, ReadFMulAdd64]>;
 def          : FPFMADDynFrmAlias<FMADD_D, "fmadd.d">;
-def FMSUB_D  : FPFMAD_rrr_frm<OPC_MSUB, "fmsub.d">;
+def FMSUB_D  : FPFMAD_rrr_frm<OPC_MSUB, "fmsub.d">,
+               Sched<[WriteFMulSub64, ReadFMulSub64, ReadFMulSub64, ReadFMulSub64]>;
 def          : FPFMADDynFrmAlias<FMSUB_D, "fmsub.d">;
-def FNMSUB_D : FPFMAD_rrr_frm<OPC_NMSUB, "fnmsub.d">;
+def FNMSUB_D : FPFMAD_rrr_frm<OPC_NMSUB, "fnmsub.d">,
+               Sched<[WriteFMulSub64, ReadFMulSub64, ReadFMulSub64, ReadFMulSub64]>;
 def          : FPFMADDynFrmAlias<FNMSUB_D, "fnmsub.d">;
-def FNMADD_D : FPFMAD_rrr_frm<OPC_NMADD, "fnmadd.d">;
+def FNMADD_D : FPFMAD_rrr_frm<OPC_NMADD, "fnmadd.d">,
+               Sched<[WriteFMulAdd64, ReadFMulAdd64, ReadFMulAdd64, ReadFMulAdd64]>;
 def          : FPFMADDynFrmAlias<FNMADD_D, "fnmadd.d">;
 
 def FADD_D : FPALUD_rr_frm<0b0000001, "fadd.d">;
@@ -96,7 +105,8 @@ def        : FPALUDDynFrmAlias<FMUL_D, "fmul.d">;
 def FDIV_D : FPALUD_rr_frm<0b0001101, "fdiv.d">;
 def        : FPALUDDynFrmAlias<FDIV_D, "fdiv.d">;
 
-def FSQRT_D : FPUnaryOp_r_frm<0b0101101, FPR64, FPR64, "fsqrt.d"> {
+def FSQRT_D : FPUnaryOp_r_frm<0b0101101, FPR64, FPR64, "fsqrt.d">,
+              Sched<[WriteFSqrt32, ReadFSqrt32]> {
   let rs2 = 0b00000;
 }
 def         : FPUnaryOpDynFrmAlias<FSQRT_D, "fsqrt.d", FPR64, FPR64>;
@@ -107,12 +117,14 @@ def FSGNJX_D : FPALUD_rr<0b0010001, 0b010, "fsgnjx.d">;
 def FMIN_D   : FPALUD_rr<0b0010101, 0b000, "fmin.d">;
 def FMAX_D   : FPALUD_rr<0b0010101, 0b001, "fmax.d">;
 
-def FCVT_S_D : FPUnaryOp_r_frm<0b0100000, FPR32, FPR64, "fcvt.s.d"> {
+def FCVT_S_D : FPUnaryOp_r_frm<0b0100000, FPR32, FPR64, "fcvt.s.d">,
+               Sched<[WriteFCvtF64ToF32, ReadFCvtF64ToF32]> {
   let rs2 = 0b00001;
 }
 def          : FPUnaryOpDynFrmAlias<FCVT_S_D, "fcvt.s.d", FPR32, FPR64>;
 
-def FCVT_D_S : FPUnaryOp_r<0b0100001, 0b000, FPR64, FPR32, "fcvt.d.s"> {
+def FCVT_D_S : FPUnaryOp_r<0b0100001, 0b000, FPR64, FPR32, "fcvt.d.s">,
+               Sched<[WriteFCvtF32ToF64, ReadFCvtF32ToF64]> {
   let rs2 = 0b00000;
 }
 
@@ -120,55 +132,66 @@ def FEQ_D : FPCmpD_rr<0b010, "feq.d">;
 def FLT_D : FPCmpD_rr<0b001, "flt.d">;
 def FLE_D : FPCmpD_rr<0b000, "fle.d">;
 
-def FCLASS_D : FPUnaryOp_r<0b1110001, 0b001, GPR, FPR64, "fclass.d"> {
+def FCLASS_D : FPUnaryOp_r<0b1110001, 0b001, GPR, FPR64, "fclass.d">,
+               Sched<[WriteFClass64, ReadFClass64]> {
   let rs2 = 0b00000;
 }
 
-def FCVT_W_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.w.d"> {
+def FCVT_W_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.w.d">,
+               Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]> {
   let rs2 = 0b00000;
 }
 def          : FPUnaryOpDynFrmAlias<FCVT_W_D, "fcvt.w.d", GPR, FPR64>;
 
-def FCVT_WU_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.wu.d"> {
+def FCVT_WU_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.wu.d">,
+                Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]> {
   let rs2 = 0b00001;
 }
 def           : FPUnaryOpDynFrmAlias<FCVT_WU_D, "fcvt.wu.d", GPR, FPR64>;
 
-def FCVT_D_W : FPUnaryOp_r<0b1101001, 0b000, FPR64, GPR, "fcvt.d.w"> {
+def FCVT_D_W : FPUnaryOp_r<0b1101001, 0b000, FPR64, GPR, "fcvt.d.w">,
+               Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]> {
   let rs2 = 0b00000;
 }
 
-def FCVT_D_WU : FPUnaryOp_r<0b1101001, 0b000, FPR64, GPR, "fcvt.d.wu"> {
+def FCVT_D_WU : FPUnaryOp_r<0b1101001, 0b000, FPR64, GPR, "fcvt.d.wu">,
+                Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]> {
   let rs2 = 0b00001;
 }
 } // Predicates = [HasStdExtD]
 
 let Predicates = [HasStdExtD, IsRV64] in {
-def FCVT_L_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.l.d"> {
+def FCVT_L_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.l.d">,
+               Sched<[WriteFCvtF64ToI64, ReadFCvtF64ToI64]> {
   let rs2 = 0b00010;
 }
 def          : FPUnaryOpDynFrmAlias<FCVT_L_D, "fcvt.l.d", GPR, FPR64>;
 
-def FCVT_LU_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.lu.d"> {
+def FCVT_LU_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.lu.d">,
+                Sched<[WriteFCvtF64ToI64, ReadFCvtF64ToI64]> {
   let rs2 = 0b00011;
 }
 def           : FPUnaryOpDynFrmAlias<FCVT_LU_D, "fcvt.lu.d", GPR, FPR64>;
 
-def FMV_X_D : FPUnaryOp_r<0b1110001, 0b000, GPR, FPR64, "fmv.x.d"> {
+def FMV_X_D : FPUnaryOp_r<0b1110001, 0b000, GPR, FPR64, "fmv.x.d">,
+              Sched<[WriteFMovF64ToI64, ReadFMovF64ToI64]> {
   let rs2 = 0b00000;
 }
 
-def FCVT_D_L : FPUnaryOp_r_frm<0b1101001, FPR64, GPR, "fcvt.d.l"> {
+def FCVT_D_L : FPUnaryOp_r_frm<0b1101001, FPR64, GPR, "fcvt.d.l">,
+               Sched<[WriteFCvtI64ToF64, ReadFCvtI64ToF64]> {
   let rs2 = 0b00010;
 }
 def          : FPUnaryOpDynFrmAlias<FCVT_D_L, "fcvt.d.l", FPR64, GPR>;
 
-def FCVT_D_LU : FPUnaryOp_r_frm<0b1101001, FPR64, GPR, "fcvt.d.lu"> {
+def FCVT_D_LU : FPUnaryOp_r_frm<0b1101001, FPR64, GPR, "fcvt.d.lu">,
+                Sched<[WriteFCvtI64ToF64, ReadFCvtI64ToF64]> {
   let rs2 = 0b00011;
 }
 def           : FPUnaryOpDynFrmAlias<FCVT_D_LU, "fcvt.d.lu", FPR64, GPR>;
 
-def FMV_D_X : FPUnaryOp_r<0b1111001, 0b000, FPR64, GPR, "fmv.d.x"> {
+def FMV_D_X : FPUnaryOp_r<0b1111001, 0b000, FPR64, GPR, "fmv.d.x">,
+              Sched<[WriteFMovI64ToF64, ReadFMovI64ToF64]> {
   let rs2 = 0b00000;
 }
 } // Predicates = [HasStdExtD, IsRV64]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index 3b73c865ea17..782c3f65af14 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -60,7 +60,8 @@ class FPFMASDynFrmAlias<FPFMAS_rrr_frm Inst, string OpcodeStr>
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
 class FPALUS_rr<bits<7> funct7, bits<3> funct3, string opcodestr>
     : RVInstR<funct7, funct3, OPC_OP_FP, (outs FPR32:$rd),
-              (ins FPR32:$rs1, FPR32:$rs2), opcodestr, "$rd, $rs1, $rs2">;
+              (ins FPR32:$rs1, FPR32:$rs2), opcodestr, "$rd, $rs1, $rs2">,
+      Sched<[WriteFALU32, ReadFALU32, ReadFALU32]>;
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
 class FPALUS_rr_frm<bits<7> funct7, string opcodestr>
@@ -93,7 +94,8 @@ class FPUnaryOpDynFrmAlias<FPUnaryOp_r_frm Inst, string OpcodeStr,
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
 class FPCmpS_rr<bits<3> funct3, string opcodestr>
     : RVInstR<0b1010000, funct3, OPC_OP_FP, (outs GPR:$rd),
-              (ins FPR32:$rs1, FPR32:$rs2), opcodestr, "$rd, $rs1, $rs2">;
+              (ins FPR32:$rs1, FPR32:$rs2), opcodestr, "$rd, $rs1, $rs2">,
+      Sched<[WriteFCmp32, ReadFCmp32, ReadFCmp32]>;
 
 //===----------------------------------------------------------------------===//
 // Instructions
@@ -103,7 +105,8 @@ let Predicates = [HasStdExtF] in {
 let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
 def FLW : RVInstI<0b010, OPC_LOAD_FP, (outs FPR32:$rd),
                   (ins GPR:$rs1, simm12:$imm12),
-                   "flw", "$rd, ${imm12}(${rs1})">;
+                   "flw", "$rd, ${imm12}(${rs1})">,
+          Sched<[WriteFLD32, ReadMemBase]>;
 
 // Operands for stores are in the order srcreg, base, offset rather than
 // reflecting the order these fields are specified in the instruction
@@ -111,27 +114,37 @@ def FLW : RVInstI<0b010, OPC_LOAD_FP, (outs FPR32:$rd),
 let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
 def FSW : RVInstS<0b010, OPC_STORE_FP, (outs),
                   (ins FPR32:$rs2, GPR:$rs1, simm12:$imm12),
-                   "fsw", "$rs2, ${imm12}(${rs1})">;
+                   "fsw", "$rs2, ${imm12}(${rs1})">,
+          Sched<[WriteFST32, ReadStoreData, ReadMemBase]>;
 
-def FMADD_S  : FPFMAS_rrr_frm<OPC_MADD, "fmadd.s">;
+def FMADD_S  : FPFMAS_rrr_frm<OPC_MADD, "fmadd.s">,
+               Sched<[WriteFMulAdd32, ReadFMulAdd32, ReadFMulAdd32, ReadFMulAdd32]>;
 def          : FPFMASDynFrmAlias<FMADD_S, "fmadd.s">;
-def FMSUB_S  : FPFMAS_rrr_frm<OPC_MSUB, "fmsub.s">;
+def FMSUB_S  : FPFMAS_rrr_frm<OPC_MSUB, "fmsub.s">,
+               Sched<[WriteFMulSub32, ReadFMulSub32, ReadFMulSub32, ReadFMulSub32]>;
 def          : FPFMASDynFrmAlias<FMSUB_S, "fmsub.s">;
-def FNMSUB_S : FPFMAS_rrr_frm<OPC_NMSUB, "fnmsub.s">;
+def FNMSUB_S : FPFMAS_rrr_frm<OPC_NMSUB, "fnmsub.s">,
+               Sched<[WriteFMulSub32, ReadFMulSub32, ReadFMulSub32, ReadFMulSub32]>;
 def          : FPFMASDynFrmAlias<FNMSUB_S, "fnmsub.s">;
-def FNMADD_S : FPFMAS_rrr_frm<OPC_NMADD, "fnmadd.s">;
+def FNMADD_S : FPFMAS_rrr_frm<OPC_NMADD, "fnmadd.s">,
+               Sched<[WriteFMulAdd32, ReadFMulAdd32, ReadFMulAdd32, ReadFMulAdd32]>;
 def          : FPFMASDynFrmAlias<FNMADD_S, "fnmadd.s">;
 
-def FADD_S : FPALUS_rr_frm<0b0000000, "fadd.s">;
+def FADD_S : FPALUS_rr_frm<0b0000000, "fadd.s">,
+             Sched<[WriteFALU32, ReadFALU32, ReadFALU32]>;
 def        : FPALUSDynFrmAlias<FADD_S, "fadd.s">;
-def FSUB_S : FPALUS_rr_frm<0b0000100, "fsub.s">;
+def FSUB_S : FPALUS_rr_frm<0b0000100, "fsub.s">,
+             Sched<[WriteFALU32, ReadFALU32, ReadFALU32]>;
 def        : FPALUSDynFrmAlias<FSUB_S, "fsub.s">;
-def FMUL_S : FPALUS_rr_frm<0b0001000, "fmul.s">;
+def FMUL_S : FPALUS_rr_frm<0b0001000, "fmul.s">,
+             Sched<[WriteFMul32, ReadFMul32, ReadFMul32]>;
 def        : FPALUSDynFrmAlias<FMUL_S, "fmul.s">;
-def FDIV_S : FPALUS_rr_frm<0b0001100, "fdiv.s">;
+def FDIV_S : FPALUS_rr_frm<0b0001100, "fdiv.s">,
+             Sched<[WriteFDiv32, ReadFDiv32, ReadFDiv32]>;
 def        : FPALUSDynFrmAlias<FDIV_S, "fdiv.s">;
 
-def FSQRT_S : FPUnaryOp_r_frm<0b0101100, FPR32, FPR32, "fsqrt.s"> {
+def FSQRT_S : FPUnaryOp_r_frm<0b0101100, FPR32, FPR32, "fsqrt.s">,
+              Sched<[WriteFSqrt32, ReadFSqrt32]> {
   let rs2 = 0b00000;
 }
 def         : FPUnaryOpDynFrmAlias<FSQRT_S, "fsqrt.s", FPR32, FPR32>;
@@ -142,17 +155,20 @@ def FSGNJX_S : FPALUS_rr<0b0010000, 0b010, "fsgnjx.s">;
 def FMIN_S   : FPALUS_rr<0b0010100, 0b000, "fmin.s">;
 def FMAX_S   : FPALUS_rr<0b0010100, 0b001, "fmax.s">;
 
-def FCVT_W_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.w.s"> {
+def FCVT_W_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.w.s">,
+               Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]> {
   let rs2 = 0b00000;
 }
 def          : FPUnaryOpDynFrmAlias<FCVT_W_S, "fcvt.w.s", GPR, FPR32>;
 
-def FCVT_WU_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.wu.s"> {
+def FCVT_WU_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.wu.s">,
+                Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]> {
   let rs2 = 0b00001;
 }
 def           : FPUnaryOpDynFrmAlias<FCVT_WU_S, "fcvt.wu.s", GPR, FPR32>;
 
-def FMV_X_W : FPUnaryOp_r<0b1110000, 0b000, GPR, FPR32, "fmv.x.w"> {
+def FMV_X_W : FPUnaryOp_r<0b1110000, 0b000, GPR, FPR32, "fmv.x.w">,
+              Sched<[WriteFMovF32ToI32, ReadFMovF32ToI32]> {
   let rs2 = 0b00000;
 }
 
@@ -160,42 +176,50 @@ def FEQ_S : FPCmpS_rr<0b010, "feq.s">;
 def FLT_S : FPCmpS_rr<0b001, "flt.s">;
 def FLE_S : FPCmpS_rr<0b000, "fle.s">;
 
-def FCLASS_S : FPUnaryOp_r<0b1110000, 0b001, GPR, FPR32, "fclass.s"> {
+def FCLASS_S : FPUnaryOp_r<0b1110000, 0b001, GPR, FPR32, "fclass.s">,
+               Sched<[WriteFClass32, ReadFClass32]> {
   let rs2 = 0b00000;
 }
 
-def FCVT_S_W : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.w"> {
+def FCVT_S_W : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.w">,
+               Sched<[WriteFCvtI32ToF32, ReadFCvtI32ToF32]> {
   let rs2 = 0b00000;
 }
 def          : FPUnaryOpDynFrmAlias<FCVT_S_W, "fcvt.s.w", FPR32, GPR>;
 
-def FCVT_S_WU : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.wu"> {
+def FCVT_S_WU : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.wu">,
+                Sched<[WriteFCvtI32ToF32, ReadFCvtI32ToF32]> {
   let rs2 = 0b00001;
 }
 def           : FPUnaryOpDynFrmAlias<FCVT_S_WU, "fcvt.s.wu", FPR32, GPR>;
 
-def FMV_W_X : FPUnaryOp_r<0b1111000, 0b000, FPR32, GPR, "fmv.w.x"> {
+def FMV_W_X : FPUnaryOp_r<0b1111000, 0b000, FPR32, GPR, "fmv.w.x">,
+              Sched<[WriteFMovI32ToF32, ReadFMovI32ToF32]> {
   let rs2 = 0b00000;
 }
 } // Predicates = [HasStdExtF]
 
 let Predicates = [HasStdExtF, IsRV64] in {
-def FCVT_L_S  : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.l.s"> {
+def FCVT_L_S  : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.l.s">,
+                Sched<[WriteFCvtF32ToI64, ReadFCvtF32ToI64]> {
   let rs2 = 0b00010;
 }
 def           : FPUnaryOpDynFrmAlias<FCVT_L_S, "fcvt.l.s", GPR, FPR32>;
 
-def FCVT_LU_S  : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.lu.s"> {
+def FCVT_LU_S  : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.lu.s">,
+                 Sched<[WriteFCvtF32ToI64, ReadFCvtF32ToI64]> {
   let rs2 = 0b00011;
 }
 def            : FPUnaryOpDynFrmAlias<FCVT_LU_S, "fcvt.lu.s", GPR, FPR32>;
 
-def FCVT_S_L : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.l"> {
+def FCVT_S_L : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.l">,
+               Sched<[WriteFCvtI64ToF32, ReadFCvtI64ToF32]> {
   let rs2 = 0b00010;
 }
 def          : FPUnaryOpDynFrmAlias<FCVT_S_L, "fcvt.s.l", FPR32, GPR>;
 
-def FCVT_S_LU : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.lu"> {
+def FCVT_S_LU : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.lu">,
+                Sched<[WriteFCvtI64ToF32, ReadFCvtI64ToF32]> {
   let rs2 = 0b00011;
 }
 def           : FPUnaryOpDynFrmAlias<FCVT_S_LU, "fcvt.s.lu", FPR32, GPR>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
index e75151ba99c7..987534aadd79 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
@@ -24,22 +24,35 @@ def riscv_remuw : SDNode<"RISCVISD::REMUW", SDTIntBinOp>;
 //===----------------------------------------------------------------------===//
 
 let Predicates = [HasStdExtM] in {
-def MUL     : ALU_rr<0b0000001, 0b000, "mul">;
-def MULH    : ALU_rr<0b0000001, 0b001, "mulh">;
-def MULHSU  : ALU_rr<0b0000001, 0b010, "mulhsu">;
-def MULHU   : ALU_rr<0b0000001, 0b011, "mulhu">;
-def DIV     : ALU_rr<0b0000001, 0b100, "div">;
-def DIVU    : ALU_rr<0b0000001, 0b101, "divu">;
-def REM     : ALU_rr<0b0000001, 0b110, "rem">;
-def REMU    : ALU_rr<0b0000001, 0b111, "remu">;
+def MUL     : ALU_rr<0b0000001, 0b000, "mul">,
+              Sched<[WriteIMul, ReadIMul, ReadIMul]>;
+def MULH    : ALU_rr<0b0000001, 0b001, "mulh">,
+              Sched<[WriteIMul, ReadIMul, ReadIMul]>;
+def MULHSU  : ALU_rr<0b0000001, 0b010, "mulhsu">,
+              Sched<[WriteIMul, ReadIMul, ReadIMul]>;
+def MULHU   : ALU_rr<0b0000001, 0b011, "mulhu">,
+              Sched<[WriteIMul, ReadIMul, ReadIMul]>;
+def DIV     : ALU_rr<0b0000001, 0b100, "div">,
+              Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>;
+def DIVU    : ALU_rr<0b0000001, 0b101, "divu">,
+              Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>;
+def REM     : ALU_rr<0b0000001, 0b110, "rem">,
+              Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>;
+def REMU    : ALU_rr<0b0000001, 0b111, "remu">,
+              Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>;
 } // Predicates = [HasStdExtM]
 
 let Predicates = [HasStdExtM, IsRV64] in {
-def MULW    : ALUW_rr<0b0000001, 0b000, "mulw">;
-def DIVW    : ALUW_rr<0b0000001, 0b100, "divw">;
-def DIVUW   : ALUW_rr<0b0000001, 0b101, "divuw">;
-def REMW    : ALUW_rr<0b0000001, 0b110, "remw">;
-def REMUW   : ALUW_rr<0b0000001, 0b111, "remuw">;
+def MULW    : ALUW_rr<0b0000001, 0b000, "mulw">,
+              Sched<[WriteIMul32, ReadIMul32, ReadIMul32]>;
+def DIVW    : ALUW_rr<0b0000001, 0b100, "divw">,
+              Sched<[WriteIDiv32, ReadIDiv32, ReadIDiv32]>;
+def DIVUW   : ALUW_rr<0b0000001, 0b101, "divuw">,
+              Sched<[WriteIDiv32, ReadIDiv32, ReadIDiv32]>;
+def REMW    : ALUW_rr<0b0000001, 0b110, "remw">,
+              Sched<[WriteIDiv32, ReadIDiv32, ReadIDiv32]>;
+def REMUW   : ALUW_rr<0b0000001, 0b111, "remuw">,
+              Sched<[WriteIDiv32, ReadIDiv32, ReadIDiv32]>;
 } // Predicates = [HasStdExtM, IsRV64]
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket32.td b/llvm/lib/Target/RISCV/RISCVSchedRocket32.td
new file mode 100644
index 000000000000..8a91a70b61c7
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVSchedRocket32.td
@@ -0,0 +1,213 @@
+//==- RISCVSchedRocket32.td - Rocket Scheduling Definitions -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// The following definitions describe the simpler per-operand machine model.
+// This works with MachineScheduler. See MCSchedule.h for details.
+
+// Rocket machine model for scheduling and other instruction cost heuristics.
+def Rocket32Model : SchedMachineModel {
+  let MicroOpBufferSize = 0; // Explicitly set to zero since Rocket is in-order.
+  let IssueWidth = 1;        // 1 micro-ops are dispatched per cycle.
+  let LoadLatency = 3;
+  let MispredictPenalty = 3;
+  let CompleteModel = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available.
+
+// Modeling each pipeline as a ProcResource using the BufferSize = 0 since
+// Rocket is in-order.
+
+let BufferSize = 0 in {
+def Rocket32UnitALU        : ProcResource<1>; // Int ALU
+def Rocket32UnitIMul       : ProcResource<1>; // Int Multiply
+def Rocket32UnitMem        : ProcResource<1>; // Load/Store
+def Rocket32UnitB          : ProcResource<1>; // Branch
+
+def Rocket32UnitFPALU      : ProcResource<1>; // FP ALU
+}
+
+let BufferSize = 1 in {
+def Rocket32UnitIDiv       : ProcResource<1>; // Int Division
+def Rocket32UnitFPDivSqrt  : ProcResource<1>; // FP Divide/Sqrt'
+}
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedWrite types which both map the ProcResources and
+// set the latency.
+
+let SchedModel = Rocket32Model in {
+
+def : WriteRes<WriteJmp, [Rocket32UnitB]>;
+def : WriteRes<WriteJal, [Rocket32UnitB]>;
+def : WriteRes<WriteJalr, [Rocket32UnitB]>;
+def : WriteRes<WriteJmpReg, [Rocket32UnitB]>;
+
+def : WriteRes<WriteIALU, [Rocket32UnitALU]>;
+def : WriteRes<WriteShift, [Rocket32UnitALU]>;
+
+// Multiplies on Rocket differ by implementation; placeholder until
+// we can determine how to read from command line
+def : WriteRes<WriteIMul, [Rocket32UnitIMul]> { let Latency = 4; }
+
+// 32-bit divides have worse case latency of 34 cycle
+def : WriteRes<WriteIDiv, [Rocket32UnitIDiv]> {
+  let Latency = 34;
+  let ResourceCycles = [34];
+}
+
+// Memory
+def : WriteRes<WriteSTB, [Rocket32UnitMem]>;
+def : WriteRes<WriteSTH, [Rocket32UnitMem]>;
+def : WriteRes<WriteSTW, [Rocket32UnitMem]>;
+def : WriteRes<WriteFST32, [Rocket32UnitMem]>;
+def : WriteRes<WriteFST64, [Rocket32UnitMem]>;
+
+let Latency = 3 in {
+def : WriteRes<WriteLDB, [Rocket32UnitMem]>;
+def : WriteRes<WriteLDH, [Rocket32UnitMem]>;
+def : WriteRes<WriteCSR, [Rocket32UnitALU]>;
+}
+
+let Latency = 2 in {
+def : WriteRes<WriteLDW, [Rocket32UnitMem]>;
+def : WriteRes<WriteFLD32, [Rocket32UnitMem]>;
+def : WriteRes<WriteFLD64, [Rocket32UnitMem]>;
+
+def : WriteRes<WriteAtomicW, [Rocket32UnitMem]>;
+def : WriteRes<WriteAtomicLDW, [Rocket32UnitMem]>;
+}
+
+def : WriteRes<WriteAtomicSTW, [Rocket32UnitMem]>;
+
+// Most FP single precision operations are 4 cycles
+def : WriteRes<WriteFALU32, [Rocket32UnitFPALU]> { let Latency = 4; }
+
+// Most FP double precision operations are 6 cycles
+def : WriteRes<WriteFALU64, [Rocket32UnitFPALU]> { let Latency = 6; }
+
+let Latency = 2 in {
+def : WriteRes<WriteFCvtI32ToF32, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCvtI32ToF64, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCvtF32ToI32, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCvtF64ToI32, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCvtF32ToF64, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCvtF64ToF32, [Rocket32UnitFPALU]>;
+
+def : WriteRes<WriteFClass32, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFClass64, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCmp32, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCmp64, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFMovF32ToI32, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFMovI32ToF32, [Rocket32UnitFPALU]>;
+}
+
+let Latency = 5 in {
+def : WriteRes<WriteFMul32, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFMulAdd32, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFMulSub32, [Rocket32UnitFPALU]>;
+}
+
+let Latency = 7 in {
+def : WriteRes<WriteFMul64, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFMulAdd64, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFMulSub64, [Rocket32UnitFPALU]>;
+}
+
+// FP Divide unit on Rocket is not pipelined, so set resource cycles to latency
+let Latency = 20, ResourceCycles = [20] in {
+def : WriteRes<WriteFDiv32, [Rocket32UnitFPDivSqrt]>;
+def : WriteRes<WriteFDiv64, [Rocket32UnitFPDivSqrt]>;
+}
+
+// FP Sqrt unit on Rocket is not pipelined, so set resource cycles to latency
+def : WriteRes<WriteFSqrt32, [Rocket32UnitFPDivSqrt]> { let Latency = 20;
+                                                        let ResourceCycles = [20];}
+def : WriteRes<WriteFSqrt64, [Rocket32UnitFPDivSqrt]> { let Latency = 25;
+                                                        let ResourceCycles = [25];}
+
+def : WriteRes<WriteNop, []>;
+
+def : InstRW<[WriteIALU], (instrs COPY)>;
+
+let Unsupported = 1 in {
+def : WriteRes<WriteIALU32, []>;
+def : WriteRes<WriteShift32, []>;
+def : WriteRes<WriteIMul32, []>;
+def : WriteRes<WriteIDiv32, []>;
+def : WriteRes<WriteSTD, []>;
+def : WriteRes<WriteLDWU, []>;
+def : WriteRes<WriteLDD, []>;
+def : WriteRes<WriteAtomicD, []>;
+def : WriteRes<WriteAtomicLDD, []>;
+def : WriteRes<WriteAtomicSTD, []>;
+def : WriteRes<WriteFCvtI64ToF32, []>;
+def : WriteRes<WriteFCvtI64ToF64, []>;
+def : WriteRes<WriteFCvtF64ToI64, []>;
+def : WriteRes<WriteFCvtF32ToI64, []>;
+def : WriteRes<WriteFMovI64ToF64, []>;
+def : WriteRes<WriteFMovF64ToI64, []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedRead types with cycles.
+// Dummy definitions for RocketCore.
+def : ReadAdvance<ReadJmp, 0>;
+def : ReadAdvance<ReadJalr, 0>;
+def : ReadAdvance<ReadCSR, 0>;
+def : ReadAdvance<ReadStoreData, 0>;
+def : ReadAdvance<ReadMemBase, 0>;
+def : ReadAdvance<ReadIALU, 0>;
+def : ReadAdvance<ReadIALU32, 0>;
+def : ReadAdvance<ReadShift, 0>;
+def : ReadAdvance<ReadShift32, 0>;
+def : ReadAdvance<ReadIDiv, 0>;
+def : ReadAdvance<ReadIDiv32, 0>;
+def : ReadAdvance<ReadIMul, 0>;
+def : ReadAdvance<ReadIMul32, 0>;
+def : ReadAdvance<ReadAtomicWA, 0>;
+def : ReadAdvance<ReadAtomicWD, 0>;
+def : ReadAdvance<ReadAtomicDA, 0>;
+def : ReadAdvance<ReadAtomicDD, 0>;
+def : ReadAdvance<ReadAtomicLDW, 0>;
+def : ReadAdvance<ReadAtomicLDD, 0>;
+def : ReadAdvance<ReadAtomicSTW, 0>;
+def : ReadAdvance<ReadAtomicSTD, 0>;
+def : ReadAdvance<ReadFALU32, 0>;
+def : ReadAdvance<ReadFALU64, 0>;
+def : ReadAdvance<ReadFMul32, 0>;
+def : ReadAdvance<ReadFMulAdd32, 0>;
+def : ReadAdvance<ReadFMulSub32, 0>;
+def : ReadAdvance<ReadFMul64, 0>;
+def : ReadAdvance<ReadFMulAdd64, 0>;
+def : ReadAdvance<ReadFMulSub64, 0>;
+def : ReadAdvance<ReadFDiv32, 0>;
+def : ReadAdvance<ReadFDiv64, 0>;
+def : ReadAdvance<ReadFSqrt32, 0>;
+def : ReadAdvance<ReadFSqrt64, 0>;
+def : ReadAdvance<ReadFCmp32, 0>;
+def : ReadAdvance<ReadFCmp64, 0>;
+def : ReadAdvance<ReadFCvtF32ToI32, 0>;
+def : ReadAdvance<ReadFCvtF32ToI64, 0>;
+def : ReadAdvance<ReadFCvtF64ToI32, 0>;
+def : ReadAdvance<ReadFCvtF64ToI64, 0>;
+def : ReadAdvance<ReadFCvtI32ToF32, 0>;
+def : ReadAdvance<ReadFCvtI32ToF64, 0>;
+def : ReadAdvance<ReadFCvtI64ToF32, 0>;
+def : ReadAdvance<ReadFCvtI64ToF64, 0>;
+def : ReadAdvance<ReadFCvtF32ToF64, 0>;
+def : ReadAdvance<ReadFCvtF64ToF32, 0>;
+def : ReadAdvance<ReadFMovF32ToI32, 0>;
+def : ReadAdvance<ReadFMovI32ToF32, 0>;
+def : ReadAdvance<ReadFMovF64ToI64, 0>;
+def : ReadAdvance<ReadFMovI64ToF64, 0>;
+def : ReadAdvance<ReadFClass32, 0>;
+def : ReadAdvance<ReadFClass64, 0>;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket64.td b/llvm/lib/Target/RISCV/RISCVSchedRocket64.td
new file mode 100644
index 000000000000..79e79f90f2f0
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVSchedRocket64.td
@@ -0,0 +1,214 @@
+//==- RISCVSchedRocket64.td - Rocket Scheduling Definitions -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// The following definitions describe the simpler per-operand machine model.
+// This works with MachineScheduler. See MCSchedule.h for details.
+
+// Rocket machine model for scheduling and other instruction cost heuristics.
+def Rocket64Model : SchedMachineModel {
+  let MicroOpBufferSize = 0; // Explicitly set to zero since Rocket is in-order.
+  let IssueWidth = 1;        // 1 micro-ops are dispatched per cycle.
+  let LoadLatency = 3;
+  let MispredictPenalty = 3;
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available.
+
+// Modeling each pipeline as a ProcResource using the BufferSize = 0 since
+// Rocket is in-order.
+
+let BufferSize = 0 in {
+def Rocket64UnitALU        : ProcResource<1>; // Int ALU
+def Rocket64UnitIMul       : ProcResource<1>; // Int Multiply
+def Rocket64UnitMem        : ProcResource<1>; // Load/Store
+def Rocket64UnitB          : ProcResource<1>; // Branch
+
+def Rocket64UnitFPALU      : ProcResource<1>; // FP ALU
+}
+
+let BufferSize = 1 in {
+def Rocket64UnitIDiv       : ProcResource<1>; // Int Division
+def Rocket64UnitFPDivSqrt  : ProcResource<1>; // FP Divide/Sqrt
+}
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedWrite types which both map the ProcResources and
+// set the latency.
+
+let SchedModel = Rocket64Model in {
+
+def : WriteRes<WriteJmp, [Rocket64UnitB]>;
+def : WriteRes<WriteJal, [Rocket64UnitB]>;
+def : WriteRes<WriteJalr, [Rocket64UnitB]>;
+def : WriteRes<WriteJmpReg, [Rocket64UnitB]>;
+
+def : WriteRes<WriteIALU32, [Rocket64UnitALU]>;
+def : WriteRes<WriteIALU, [Rocket64UnitALU]>;
+def : WriteRes<WriteShift32, [Rocket64UnitALU]>;
+def : WriteRes<WriteShift, [Rocket64UnitALU]>;
+
+let Latency = 4 in {
+def : WriteRes<WriteIMul, [Rocket64UnitIMul]>;
+def : WriteRes<WriteIMul32, [Rocket64UnitIMul]>;
+}
+
+// Integer divide varies based on operand magnitude and sign; worse case latency is 34.
+def : WriteRes<WriteIDiv32, [Rocket64UnitIDiv]> {
+  let Latency = 34;
+  let ResourceCycles = [34];
+}
+def : WriteRes<WriteIDiv, [Rocket64UnitIDiv]> {
+  let Latency = 33;
+  let ResourceCycles = [33];
+}
+
+// Memory
+def : WriteRes<WriteSTB, [Rocket64UnitMem]>;
+def : WriteRes<WriteSTH, [Rocket64UnitMem]>;
+def : WriteRes<WriteSTW, [Rocket64UnitMem]>;
+def : WriteRes<WriteSTD, [Rocket64UnitMem]>;
+def : WriteRes<WriteFST32, [Rocket64UnitMem]>;
+def : WriteRes<WriteFST64, [Rocket64UnitMem]>;
+
+let Latency = 3 in {
+def : WriteRes<WriteLDB, [Rocket64UnitMem]>;
+def : WriteRes<WriteLDH, [Rocket64UnitMem]>;
+def : WriteRes<WriteCSR, [Rocket64UnitALU]>;
+}
+
+let Latency = 2 in {
+def : WriteRes<WriteLDW, [Rocket64UnitMem]>;
+def : WriteRes<WriteLDWU, [Rocket64UnitMem]>;
+def : WriteRes<WriteLDD, [Rocket64UnitMem]>;
+def : WriteRes<WriteFLD32, [Rocket64UnitMem]>;
+def : WriteRes<WriteFLD64, [Rocket64UnitMem]>;
+
+def : WriteRes<WriteAtomicW, [Rocket64UnitMem]>;
+def : WriteRes<WriteAtomicD, [Rocket64UnitMem]>;
+
+def : WriteRes<WriteAtomicLDW, [Rocket64UnitMem]>;
+def : WriteRes<WriteAtomicLDD, [Rocket64UnitMem]>;
+}
+
+def : WriteRes<WriteAtomicSTW, [Rocket64UnitMem]>;
+def : WriteRes<WriteAtomicSTD, [Rocket64UnitMem]>;
+
+// Most FP single precision operations are 4 cycles
+def : WriteRes<WriteFALU32, [Rocket64UnitFPALU]> { let Latency = 4; }
+
+// Most FP double precision operations are 6 cycles
+def : WriteRes<WriteFALU64, [Rocket64UnitFPALU]> { let Latency = 6; }
+
+// Conversion instructions
+let Latency = 2 in {
+def : WriteRes<WriteFCvtI32ToF32, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCvtI32ToF64, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCvtI64ToF32, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCvtI64ToF64, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCvtF32ToI32, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCvtF32ToI64, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCvtF64ToI32, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCvtF64ToI64, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCvtF32ToF64, [Rocket32UnitFPALU]>;
+def : WriteRes<WriteFCvtF64ToF32, [Rocket32UnitFPALU]>;
+
+def : WriteRes<WriteFClass32, [Rocket64UnitFPALU]>;
+def : WriteRes<WriteFClass64, [Rocket64UnitFPALU]>;
+def : WriteRes<WriteFCmp32, [Rocket64UnitFPALU]>;
+def : WriteRes<WriteFCmp64, [Rocket64UnitFPALU]>;
+def : WriteRes<WriteFMovF32ToI32, [Rocket64UnitFPALU]>;
+def : WriteRes<WriteFMovI32ToF32, [Rocket64UnitFPALU]>;
+def : WriteRes<WriteFMovF64ToI64, [Rocket64UnitFPALU]>;
+def : WriteRes<WriteFMovI64ToF64, [Rocket64UnitFPALU]>;
+}
+
+let Latency = 5 in {
+def : WriteRes<WriteFMul32, [Rocket64UnitFPALU]>;
+def : WriteRes<WriteFMulAdd32, [Rocket64UnitFPALU]>;
+def : WriteRes<WriteFMulSub32, [Rocket64UnitFPALU]>;
+}
+
+let Latency = 7 in {
+def : WriteRes<WriteFMul64, [Rocket64UnitFPALU]>;
+def : WriteRes<WriteFMulAdd64, [Rocket64UnitFPALU]>;
+def : WriteRes<WriteFMulSub64, [Rocket64UnitFPALU]>;
+}
+
+// FP Divide unit on Rocket is not pipelined, so set resource cycles to latency
+let Latency = 20, ResourceCycles = [20] in {
+def : WriteRes<WriteFDiv32, [Rocket64UnitFPDivSqrt]>;
+def : WriteRes<WriteFDiv64, [Rocket64UnitFPDivSqrt]>;
+}
+
+// FP Sqrt unit on Rocket is not pipelined, so set resource cycles to latency
+def : WriteRes<WriteFSqrt32, [Rocket64UnitFPDivSqrt]> { let Latency = 20;
+                                                        let ResourceCycles = [20]; }
+def : WriteRes<WriteFSqrt64, [Rocket64UnitFPDivSqrt]> { let Latency = 25;
+                                                        let ResourceCycles = [25]; }
+
+def : WriteRes<WriteNop, []>;
+
+def : InstRW<[WriteIALU], (instrs COPY)>;
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedRead types with cycles.
+// Dummy definitions for RocketCore.
+def : ReadAdvance<ReadJmp, 0>;
+def : ReadAdvance<ReadJalr, 0>;
+def : ReadAdvance<ReadCSR, 0>;
+def : ReadAdvance<ReadStoreData, 0>;
+def : ReadAdvance<ReadMemBase, 0>;
+def : ReadAdvance<ReadIALU, 0>;
+def : ReadAdvance<ReadIALU32, 0>;
+def : ReadAdvance<ReadShift, 0>;
+def : ReadAdvance<ReadShift32, 0>;
+def : ReadAdvance<ReadIDiv, 0>;
+def : ReadAdvance<ReadIDiv32, 0>;
+def : ReadAdvance<ReadIMul, 0>;
+def : ReadAdvance<ReadIMul32, 0>;
+def : ReadAdvance<ReadAtomicWA, 0>;
+def : ReadAdvance<ReadAtomicWD, 0>;
+def : ReadAdvance<ReadAtomicDA, 0>;
+def : ReadAdvance<ReadAtomicDD, 0>;
+def : ReadAdvance<ReadAtomicLDW, 0>;
+def : ReadAdvance<ReadAtomicLDD, 0>;
+def : ReadAdvance<ReadAtomicSTW, 0>;
+def : ReadAdvance<ReadAtomicSTD, 0>;
+def : ReadAdvance<ReadFALU32, 0>;
+def : ReadAdvance<ReadFALU64, 0>;
+def : ReadAdvance<ReadFMul32, 0>;
+def : ReadAdvance<ReadFMulAdd32, 0>;
+def : ReadAdvance<ReadFMulSub32, 0>;
+def : ReadAdvance<ReadFMul64, 0>;
+def : ReadAdvance<ReadFMulAdd64, 0>;
+def : ReadAdvance<ReadFMulSub64, 0>;
+def : ReadAdvance<ReadFDiv32, 0>;
+def : ReadAdvance<ReadFDiv64, 0>;
+def : ReadAdvance<ReadFSqrt32, 0>;
+def : ReadAdvance<ReadFSqrt64, 0>;
+def : ReadAdvance<ReadFCmp32, 0>;
+def : ReadAdvance<ReadFCmp64, 0>;
+def : ReadAdvance<ReadFCvtF32ToI32, 0>;
+def : ReadAdvance<ReadFCvtF32ToI64, 0>;
+def : ReadAdvance<ReadFCvtF64ToI32, 0>;
+def : ReadAdvance<ReadFCvtF64ToI64, 0>;
+def : ReadAdvance<ReadFCvtI32ToF32, 0>;
+def : ReadAdvance<ReadFCvtI32ToF64, 0>;
+def : ReadAdvance<ReadFCvtI64ToF32, 0>;
+def : ReadAdvance<ReadFCvtI64ToF64, 0>;
+def : ReadAdvance<ReadFCvtF32ToF64, 0>;
+def : ReadAdvance<ReadFCvtF64ToF32, 0>;
+def : ReadAdvance<ReadFMovF32ToI32, 0>;
+def : ReadAdvance<ReadFMovI32ToF32, 0>;
+def : ReadAdvance<ReadFMovF64ToI64, 0>;
+def : ReadAdvance<ReadFMovI64ToF64, 0>;
+def : ReadAdvance<ReadFClass32, 0>;
+def : ReadAdvance<ReadFClass64, 0>;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVSchedule.td b/llvm/lib/Target/RISCV/RISCVSchedule.td
new file mode 100644
index 000000000000..9e2762a5d171
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVSchedule.td
@@ -0,0 +1,138 @@
+//===-- RISCVSchedule.td - RISCV Scheduling Definitions -------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+/// Define scheduler resources associated with def operands.
+def WriteIALU       : SchedWrite;    // 32 or 64-bit integer ALU operations
+def WriteIALU32     : SchedWrite;    // 32-bit integer ALU operations on RV64I
+def WriteShift32    : SchedWrite;    // 32-bit shift operations on RV64Ix
+def WriteShift      : SchedWrite;    // 32 or 64-bit shift operations
+def WriteIDiv       : SchedWrite;    // 32-bit or 64-bit divide and remainder
+def WriteIDiv32     : SchedWrite;    // 32-bit divide and remainder on RV64I
+def WriteIMul       : SchedWrite;    // 32-bit or 64-bit multiply
+def WriteIMul32     : SchedWrite;    // 32-bit multiply on RV64I
+def WriteJmp        : SchedWrite;    // Jump
+def WriteJal        : SchedWrite;    // Jump and link
+def WriteJalr       : SchedWrite;    // Jump and link register
+def WriteJmpReg     : SchedWrite;    // Jump register
+def WriteNop        : SchedWrite;
+def WriteLDB        : SchedWrite;    // Load byte
+def WriteLDH        : SchedWrite;    // Load half-word
+def WriteLDW        : SchedWrite;    // Load word
+def WriteLDWU       : SchedWrite;    // Load word unsigned
+def WriteLDD        : SchedWrite;    // Load double-word
+def WriteCSR        : SchedWrite;    // CSR instructions
+def WriteSTB        : SchedWrite;    // Store byte
+def WriteSTH        : SchedWrite;    // Store half-word
+def WriteSTW        : SchedWrite;    // Store word
+def WriteSTD        : SchedWrite;    // Store double-word
+def WriteAtomicW    : SchedWrite;    //Atomic memory operation word size
+def WriteAtomicD    : SchedWrite;    //Atomic memory operation double word size
+def WriteAtomicLDW  : SchedWrite;    // Atomic load word
+def WriteAtomicLDD  : SchedWrite;    // Atomic load double word
+def WriteAtomicSTW  : SchedWrite;    // Atomic store word
+def WriteAtomicSTD  : SchedWrite;    // Atomic store double word
+def WriteFALU32     : SchedWrite;    // FP 32-bit computation
+def WriteFALU64     : SchedWrite;    // FP 64-bit computation
+def WriteFMul32     : SchedWrite;    // 32-bit floating point multiply
+def WriteFMulAdd32  : SchedWrite;    // 32-bit floating point multiply add
+def WriteFMulSub32  : SchedWrite;    // 32-bit floating point multiply sub
+def WriteFMul64     : SchedWrite;    // 64-bit floating point multiply
+def WriteFMulAdd64  : SchedWrite;      // 64-bit floating point multiply add
+def WriteFMulSub64  : SchedWrite;    // 64-bit floating point multiply sub
+def WriteFDiv32     : SchedWrite;    // 32-bit floating point divide
+def WriteFDiv64     : SchedWrite;    // 64-bit floating point divide
+def WriteFSqrt32    : SchedWrite;    // 32-bit floating point sqrt
+def WriteFSqrt64    : SchedWrite;    // 64-bit floating point sqrt
+
+// Integer to float conversions
+def WriteFCvtI32ToF32  : SchedWrite;
+def WriteFCvtI32ToF64  : SchedWrite;
+def WriteFCvtI64ToF32  : SchedWrite;    // RV64I only
+def WriteFCvtI64ToF64  : SchedWrite;    // RV64I only
+
+//Float to integer conversions
+def WriteFCvtF32ToI32  : SchedWrite;
+def WriteFCvtF32ToI64  : SchedWrite;    // RV64I only
+def WriteFCvtF64ToI32  : SchedWrite;
+def WriteFCvtF64ToI64  : SchedWrite;    // RV64I only
+
+// Float to float conversions
+def WriteFCvtF32ToF64  : SchedWrite;
+def WriteFCvtF64ToF32  : SchedWrite;
+
+def WriteFConv32    : SchedWrite;    // 32-bit floating point convert
+def WriteFConv64    : SchedWrite;    // 64-bit floating point convert
+def WriteFClass32   : SchedWrite;    // 32-bit floating point classify
+def WriteFClass64   : SchedWrite;    // 64-bit floating point classify
+def WriteFCmp32     : SchedWrite;    // 32-bit floating point compare
+def WriteFCmp64     : SchedWrite;    // 64-bit floating point compare
+
+def WriteFMovF32ToI32     : SchedWrite;
+def WriteFMovI32ToF32     : SchedWrite;
+def WriteFMovF64ToI64     : SchedWrite;    // RV64I only
+def WriteFMovI64ToF64     : SchedWrite;    // RV64I only
+
+def WriteFMov32       : SchedWrite;    // 32-bit floating point move
+def WriteFMov64       : SchedWrite;    // 64-bit floating point move
+def WriteFLD32        : SchedWrite;    // Floating point sp load
+def WriteFLD64        : SchedWrite;    // Floating point dp load
+def WriteFST32        : SchedWrite;    // Floating point sp store
+def WriteFST64        : SchedWrite;    // Floating point dp store
+
+/// Define scheduler resources associated with use operands.
+def ReadJmp         : SchedRead;
+def ReadJalr        : SchedRead;
+def ReadCSR         : SchedRead;
+def ReadMemBase     : SchedRead;
+def ReadStoreData   : SchedRead;
+def ReadIALU        : SchedRead;
+def ReadIALU32      : SchedRead;    // 32-bit integer ALU operations on RV64I
+def ReadShift       : SchedRead;
+def ReadShift32     : SchedRead;    // 32-bit shift operations on RV64Ix
+def ReadIDiv        : SchedRead;
+def ReadIDiv32      : SchedRead;
+def ReadIMul        : SchedRead;
+def ReadIMul32      : SchedRead;
+def ReadAtomicWA    : SchedRead;
+def ReadAtomicWD    : SchedRead;
+def ReadAtomicDA    : SchedRead;
+def ReadAtomicDD    : SchedRead;
+def ReadAtomicLDW   : SchedRead;    // Atomic load word
+def ReadAtomicLDD   : SchedRead;    // Atomic load double word
+def ReadAtomicSTW   : SchedRead;    // Atomic store word
+def ReadAtomicSTD   : SchedRead;    // Atomic store double word
+def ReadFALU32      : SchedRead;    // FP 32-bit computation
+def ReadFALU64      : SchedRead;    // FP 64-bit computation
+def ReadFMul32      : SchedRead;    // 32-bit floating point multiply
+def ReadFMulAdd32   : SchedRead;    // 32-bit floating point multiply add
+def ReadFMulSub32   : SchedRead;    // 32-bit floating point multiply sub
+def ReadFMul64      : SchedRead;    // 64-bit floating point multiply
+def ReadFMulAdd64   : SchedRead;    // 64-bit floating point multiply add
+def ReadFMulSub64   : SchedRead;    // 64-bit floating point multiply sub
+def ReadFDiv32      : SchedRead;    // 32-bit floating point divide
+def ReadFDiv64      : SchedRead;    // 64-bit floating point divide
+def ReadFSqrt32     : SchedRead;    // 32-bit floating point sqrt
+def ReadFSqrt64     : SchedRead;    // 64-bit floating point sqrt
+def ReadFCmp32      : SchedRead;
+def ReadFCmp64      : SchedRead;
+def ReadFCvtF32ToI32     : SchedRead;
+def ReadFCvtF32ToI64     : SchedRead;
+def ReadFCvtF64ToI32     : SchedRead;
+def ReadFCvtF64ToI64     : SchedRead;
+def ReadFCvtI32ToF32     : SchedRead;
+def ReadFCvtI32ToF64     : SchedRead;
+def ReadFCvtI64ToF32     : SchedRead;
+def ReadFCvtI64ToF64     : SchedRead;
+def ReadFMovF32ToI32     : SchedRead;
+def ReadFMovI32ToF32     : SchedRead;
+def ReadFMovF64ToI64     : SchedRead;
+def ReadFMovI64ToF64     : SchedRead;
+def ReadFCvtF32ToF64     : SchedRead;
+def ReadFCvtF64ToF32     : SchedRead;
+def ReadFClass32         : SchedRead;
+def ReadFClass64         : SchedRead;
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 2bb26988c7da..de71c01753de 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -15,6 +15,7 @@
 #include "RISCVTargetObjectFile.h"
 #include "RISCVTargetTransformInfo.h"
 #include "TargetInfo/RISCVTargetInfo.h"
+#include "Utils/RISCVBaseInfo.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
@@ -89,8 +90,17 @@ RISCVTargetMachine::getSubtargetImpl(const Function &F) const {
     // creation will depend on the TM and the code generation flags on the
     // function that reside in TargetOptions.
     resetTargetOptions(F);
-    I = std::make_unique<RISCVSubtarget>(TargetTriple, CPU, FS,
-                                         Options.MCOptions.getABIName(), *this);
+    auto ABIName = Options.MCOptions.getABIName();
+    if (const MDString *ModuleTargetABI = dyn_cast_or_null<MDString>(
+            F.getParent()->getModuleFlag("target-abi"))) {
+      auto TargetABI = RISCVABI::getTargetABI(ABIName);
+      if (TargetABI != RISCVABI::ABI_Unknown &&
+          ModuleTargetABI->getString() != ABIName) {
+        report_fatal_error("-target-abi option != target-abi module flag");
+      }
+      ABIName = ModuleTargetABI->getString();
+    }
+    I = std::make_unique<RISCVSubtarget>(TargetTriple, CPU, FS, ABIName, *this);
   }
   return I.get();
 }
diff --git a/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp b/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp
index 432ebb294d46..43b1f8b80c5f 100644
--- a/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp
+++ b/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp
@@ -12,16 +12,7 @@ namespace RISCVSysReg {
 namespace RISCVABI {
 ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits,
                      StringRef ABIName) {
-  auto TargetABI = StringSwitch<ABI>(ABIName)
-                       .Case("ilp32", ABI_ILP32)
-                       .Case("ilp32f", ABI_ILP32F)
-                       .Case("ilp32d", ABI_ILP32D)
-                       .Case("ilp32e", ABI_ILP32E)
-                       .Case("lp64", ABI_LP64)
-                       .Case("lp64f", ABI_LP64F)
-                       .Case("lp64d", ABI_LP64D)
-                       .Default(ABI_Unknown);
-
+  auto TargetABI = getTargetABI(ABIName);
   bool IsRV64 = TT.isArch64Bit();
   bool IsRV32E = FeatureBits[RISCV::FeatureRV32E];
 
@@ -58,6 +49,19 @@ ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits,
   return ABI_ILP32;
 }
 
+ABI getTargetABI(StringRef ABIName) {
+  auto TargetABI = StringSwitch<ABI>(ABIName)
+                       .Case("ilp32", ABI_ILP32)
+                       .Case("ilp32f", ABI_ILP32F)
+                       .Case("ilp32d", ABI_ILP32D)
+                       .Case("ilp32e", ABI_ILP32E)
+                       .Case("lp64", ABI_LP64)
+                       .Case("lp64f", ABI_LP64F)
+                       .Case("lp64d", ABI_LP64D)
+                       .Default(ABI_Unknown);
+  return TargetABI;
+}
+
 // To avoid the BP value clobbered by a function call, we need to choose a
 // callee saved register to save the value. RV32E only has X8 and X9 as callee
 // saved registers and X8 will be used as fp. So we choose X9 as bp.
diff --git a/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h
index cf078df9609a..d36c528bba1e 100644
--- a/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h
@@ -202,6 +202,8 @@ enum ABI {
 ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits,
                      StringRef ABIName);
 
+ABI getTargetABI(StringRef ABIName);
+
 // Returns the register used to hold the stack pointer after realignment.
 Register getBPReg();
 
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index c73905d3357a..ab00069497af 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -6859,8 +6859,6 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI,
   for (MachineBasicBlock::iterator NextMIIt =
          std::next(MachineBasicBlock::iterator(MI));
        NextMIIt != MBB->end(); ++NextMIIt) {
-    if (NextMIIt->definesRegister(SystemZ::CC))
-      break;
     if (isSelectPseudo(*NextMIIt)) {
       assert(NextMIIt->getOperand(3).getImm() == CCValid &&
              "Bad CCValid operands since CC was not redefined.");
@@ -6871,6 +6869,9 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI,
       }
       break;
     }
+    if (NextMIIt->definesRegister(SystemZ::CC) ||
+        NextMIIt->usesCustomInsertionHook())
+      break;
     bool User = false;
     for (auto SelMI : Selects)
       if (NextMIIt->readsVirtualRegister(SelMI->getOperand(0).getReg())) {
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
index d1f3acbd221e..3e905c18fa3b 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
@@ -751,6 +751,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
     auto *II = dyn_cast<InvokeInst>(BB.getTerminator());
     if (!II)
       continue;
+    Changed = true;
     LandingPads.insert(II->getLandingPadInst());
     IRB.SetInsertPoint(II);
 
@@ -791,6 +792,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
       auto *RI = dyn_cast<ResumeInst>(&I);
       if (!RI)
         continue;
+      Changed = true;
 
       // Split the input into legal values
       Value *Input = RI->getValue();
@@ -815,6 +817,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
         continue;
       if (Callee->getIntrinsicID() != Intrinsic::eh_typeid_for)
         continue;
+      Changed = true;
 
       IRB.SetInsertPoint(CI);
       CallInst *NewCI =
@@ -830,7 +833,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
     if (auto *LPI = dyn_cast<LandingPadInst>(I))
       LandingPads.insert(LPI);
   }
-  Changed = !LandingPads.empty();
+  Changed |= !LandingPads.empty();
 
   // Handle all the landingpad for this function together, as multiple invokes
   // may share a single lp
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index dffda5217675..2284cd7a70b8 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -85,13 +85,13 @@ cl::opt<unsigned> X86AlignBranchBoundary(
 
 cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
     "x86-align-branch",
-    cl::desc("Specify types of branches to align (plus separated list of "
-             "types). The branches's types are combination of jcc, fused, "
-             "jmp, call, ret, indirect."),
-    cl::value_desc("jcc indicates conditional jumps, fused indicates fused "
-                   "conditional jumps, jmp indicates unconditional jumps, call "
-                   "indicates direct and indirect calls, ret indicates rets, "
-                   "indirect indicates indirect jumps."),
+    cl::desc(
+        "Specify types of branches to align. The branches's types are "
+        "combination of jcc, fused, jmp, call, ret, indirect. jcc indicates "
+        "conditional jumps, fused indicates fused conditional jumps, jmp "
+        "indicates unconditional jumps, call indicates direct and indirect "
+        "calls, ret indicates rets, indirect indicates indirect jumps."),
+    cl::value_desc("(plus separated list of types)"),
     cl::location(X86AlignBranchKindLoc));
 
 cl::opt<bool> X86AlignBranchWithin32BBoundaries(
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0f152968ddfd..cbdd7135de43 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -21056,7 +21056,7 @@ X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
 
   // Divide by pow2.
   SDValue SRA =
-      DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, MVT::i64));
+      DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, MVT::i8));
 
   // If we're dividing by a positive value, we're done.  Otherwise, we must
   // negate the result.
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 2fc9a2af01d7..7f49c6e861d4 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -2002,6 +2002,25 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     break;
   }
 
+  case X86::ENDBR32:
+  case X86::ENDBR64: {
+    // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for
+    // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be
+    // non-empty. If MI is the initial ENDBR, place the
+    // __patchable_function_entries label after ENDBR.
+    if (CurrentPatchableFunctionEntrySym &&
+        CurrentPatchableFunctionEntrySym == CurrentFnBegin &&
+        MI == &MF->front().front()) {
+      MCInst Inst;
+      MCInstLowering.Lower(MI, Inst);
+      EmitAndCountInstruction(Inst);
+      CurrentPatchableFunctionEntrySym = createTempSymbol("patch");
+      OutStreamer->EmitLabel(CurrentPatchableFunctionEntrySym);
+      return;
+    }
+    break;
+  }
+
   case X86::TAILJMPr:
   case X86::TAILJMPm:
   case X86::TAILJMPd:
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 9c992830879a..7cfc29f7bf7a 100644
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
 #include "llvm-c/Transforms/PassManagerBuilder.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/CFLAndersAliasAnalysis.h"
@@ -187,8 +188,13 @@ PassManagerBuilder::~PassManagerBuilder() {
 }
 
 /// Set of global extensions, automatically added as part of the standard set.
-static ManagedStatic<SmallVector<std::pair<PassManagerBuilder::ExtensionPointTy,
-   PassManagerBuilder::ExtensionFn>, 8> > GlobalExtensions;
+static ManagedStatic<
+    SmallVector<std::tuple<PassManagerBuilder::ExtensionPointTy,
+                           PassManagerBuilder::ExtensionFn,
+                           PassManagerBuilder::GlobalExtensionID>,
+                8>>
+    GlobalExtensions;
+static PassManagerBuilder::GlobalExtensionID GlobalExtensionsCounter;
 
 /// Check if GlobalExtensions is constructed and not empty.
 /// Since GlobalExtensions is a managed static, calling 'empty()' will trigger
@@ -197,10 +203,29 @@ static bool GlobalExtensionsNotEmpty() {
   return GlobalExtensions.isConstructed() && !GlobalExtensions->empty();
 }
 
-void PassManagerBuilder::addGlobalExtension(
-    PassManagerBuilder::ExtensionPointTy Ty,
-    PassManagerBuilder::ExtensionFn Fn) {
-  GlobalExtensions->push_back(std::make_pair(Ty, std::move(Fn)));
+PassManagerBuilder::GlobalExtensionID
+PassManagerBuilder::addGlobalExtension(PassManagerBuilder::ExtensionPointTy Ty,
+                                       PassManagerBuilder::ExtensionFn Fn) {
+  auto ExtensionID = GlobalExtensionsCounter++;
+  GlobalExtensions->push_back(std::make_tuple(Ty, std::move(Fn), ExtensionID));
+  return ExtensionID;
+}
+
+void PassManagerBuilder::removeGlobalExtension(
+    PassManagerBuilder::GlobalExtensionID ExtensionID) {
+  // RegisterStandardPasses may try to call this function after GlobalExtensions
+  // has already been destroyed; doing so should not generate an error.
+  if (!GlobalExtensions.isConstructed())
+    return;
+
+  auto GlobalExtension =
+      llvm::find_if(*GlobalExtensions, [ExtensionID](const auto &elem) {
+        return std::get<2>(elem) == ExtensionID;
+      });
+  assert(GlobalExtension != GlobalExtensions->end() &&
+         "The extension ID to be removed should always be valid.");
+
+  GlobalExtensions->erase(GlobalExtension);
 }
 
 void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) {
@@ -211,8 +236,8 @@ void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy,
                                            legacy::PassManagerBase &PM) const {
   if (GlobalExtensionsNotEmpty()) {
     for (auto &Ext : *GlobalExtensions) {
-      if (Ext.first == ETy)
-        Ext.second(*this, PM);
+      if (std::get<0>(Ext) == ETy)
+        std::get<1>(Ext)(*this, PM);
     }
   }
   for (unsigned i = 0, e = Extensions.size(); i != e; ++i)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index c288a7d8d403..74654f7ef51d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -1336,6 +1336,11 @@ static bool removeBitcastsFromLoadStoreOnMinMax(InstCombiner &IC,
   if (!isMinMaxWithLoads(LoadAddr, CmpLoadTy))
     return false;
 
+  // Make sure the type would actually change.
+  // This condition can be hit with chains of bitcasts.
+  if (LI->getType() == CmpLoadTy)
+    return false;
+
   // Make sure we're not changing the size of the load/store.
   const auto &DL = IC.getDataLayout();
   if (DL.getTypeStoreSizeInBits(LI->getType()) !=
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 05a624fde86b..49645e9460cd 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1013,6 +1013,12 @@ canonicalizeMinMaxWithConstant(SelectInst &Sel, ICmpInst &Cmp,
       Cmp.getPredicate() == CanonicalPred)
     return nullptr;
 
+  // Bail out on unsimplified X-0 operand (due to some worklist management bug),
+  // as this may cause an infinite combine loop. Let the sub be folded first.
+  if (match(LHS, m_Sub(m_Value(), m_Zero())) ||
+      match(RHS, m_Sub(m_Value(), m_Zero())))
+    return nullptr;
+
   // Create the canonical compare and plug it into the select.
   Sel.setCondition(Builder.CreateICmp(CanonicalPred, LHS, RHS));
 
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 801c09a317a7..bf32996d96e2 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -3568,7 +3568,8 @@ static bool combineInstructionsOverFunction(
     ProfileSummaryInfo *PSI, bool ExpensiveCombines, unsigned MaxIterations,
     LoopInfo *LI) {
   auto &DL = F.getParent()->getDataLayout();
-  ExpensiveCombines |= EnableExpensiveCombines;
+  if (EnableExpensiveCombines.getNumOccurrences())
+    ExpensiveCombines = EnableExpensiveCombines;
   MaxIterations = std::min(MaxIterations, LimitMaxIterations.getValue());
 
   /// Builder - This is an IRBuilder that automatically inserts new
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 80acab307578..f581142df8f7 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3005,6 +3005,43 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     setOriginForNaryOp(I);
   }
 
+  Constant *getPclmulMask(IRBuilder<> &IRB, unsigned Width, bool OddElements) {
+    SmallVector<Constant *, 8> Mask;
+    for (unsigned X = OddElements ? 1 : 0; X < Width; X += 2) {
+      Constant *C = ConstantInt::get(IRB.getInt32Ty(), X);
+      Mask.push_back(C);
+      Mask.push_back(C);
+    }
+    return ConstantVector::get(Mask);
+  }
+
+  // Instrument pclmul intrinsics.
+  // These intrinsics operate either on odd or on even elements of the input
+  // vectors, depending on the constant in the 3rd argument, ignoring the rest.
+  // Replace the unused elements with copies of the used ones, ex:
+  //   (0, 1, 2, 3) -> (0, 0, 2, 2) (even case)
+  // or
+  //   (0, 1, 2, 3) -> (1, 1, 3, 3) (odd case)
+  // and then apply the usual shadow combining logic.
+  void handlePclmulIntrinsic(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+    Type *ShadowTy = getShadowTy(&I);
+    unsigned Width = I.getArgOperand(0)->getType()->getVectorNumElements();
+    assert(isa<ConstantInt>(I.getArgOperand(2)) &&
+           "pclmul 3rd operand must be a constant");
+    unsigned Imm = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
+    Value *Shuf0 =
+        IRB.CreateShuffleVector(getShadow(&I, 0), UndefValue::get(ShadowTy),
+                                getPclmulMask(IRB, Width, Imm & 0x01));
+    Value *Shuf1 =
+        IRB.CreateShuffleVector(getShadow(&I, 1), UndefValue::get(ShadowTy),
+                                getPclmulMask(IRB, Width, Imm & 0x10));
+    ShadowAndOriginCombiner SOC(this, IRB);
+    SOC.Add(Shuf0, getOrigin(&I, 0));
+    SOC.Add(Shuf1, getOrigin(&I, 1));
+    SOC.Done(&I);
+  }
+
   void visitIntrinsicInst(IntrinsicInst &I) {
     switch (I.getIntrinsicID()) {
     case Intrinsic::lifetime_start:
@@ -3238,6 +3275,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       handleBmiIntrinsic(I);
       break;
 
+    case Intrinsic::x86_pclmulqdq:
+    case Intrinsic::x86_pclmulqdq_256:
+    case Intrinsic::x86_pclmulqdq_512:
+      handlePclmulIntrinsic(I);
+      break;
+
     case Intrinsic::is_constant:
       // The result of llvm.is.constant() is always defined.
       setShadow(&I, getCleanShadow(&I));
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index c3ca43fcd492..e5edd305d3d5 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -279,9 +279,10 @@ private:
 
   /// Build a VPlan using VPRecipes according to the information gather by
   /// Legal. This method is only used for the legacy inner loop vectorizer.
-  VPlanPtr
-  buildVPlanWithVPRecipes(VFRange &Range, SmallPtrSetImpl<Value *> &NeedDef,
-                          SmallPtrSetImpl<Instruction *> &DeadInstructions);
+  VPlanPtr buildVPlanWithVPRecipes(
+      VFRange &Range, SmallPtrSetImpl<Value *> &NeedDef,
+      SmallPtrSetImpl<Instruction *> &DeadInstructions,
+      const DenseMap<Instruction *, Instruction *> &SinkAfter);
 
   /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
   /// according to the information gathered by Legal when it checked if it is
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 684a3098e564..ebfd5fe8b762 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6716,7 +6716,7 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst,
   BranchInst *BI = dyn_cast<BranchInst>(Src->getTerminator());
   assert(BI && "Unexpected terminator found");
 
-  if (!BI->isConditional())
+  if (!BI->isConditional() || BI->getSuccessor(0) == BI->getSuccessor(1))
     return EdgeMaskCache[Edge] = SrcMask;
 
   VPValue *EdgeMask = Plan->getVPValue(BI->getCondition());
@@ -7118,25 +7118,29 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF,
   SmallPtrSet<Instruction *, 4> DeadInstructions;
   collectTriviallyDeadInstructions(DeadInstructions);
 
+  DenseMap<Instruction *, Instruction *> &SinkAfter = Legal->getSinkAfter();
+  // Dead instructions do not need sinking. Remove them from SinkAfter.
+  for (Instruction *I : DeadInstructions)
+    SinkAfter.erase(I);
+
   for (unsigned VF = MinVF; VF < MaxVF + 1;) {
     VFRange SubRange = {VF, MaxVF + 1};
-    VPlans.push_back(
-        buildVPlanWithVPRecipes(SubRange, NeedDef, DeadInstructions));
+    VPlans.push_back(buildVPlanWithVPRecipes(SubRange, NeedDef,
+                                             DeadInstructions, SinkAfter));
     VF = SubRange.End;
   }
 }
 
 VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
     VFRange &Range, SmallPtrSetImpl<Value *> &NeedDef,
-    SmallPtrSetImpl<Instruction *> &DeadInstructions) {
+    SmallPtrSetImpl<Instruction *> &DeadInstructions,
+    const DenseMap<Instruction *, Instruction *> &SinkAfter) {
 
   // Hold a mapping from predicated instructions to their recipes, in order to
   // fix their AlsoPack behavior if a user is determined to replicate and use a
   // scalar instead of vector value.
   DenseMap<Instruction *, VPReplicateRecipe *> PredInst2Recipe;
 
-  DenseMap<Instruction *, Instruction *> &SinkAfter = Legal->getSinkAfter();
-
   SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups;
 
   VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder);
diff --git a/llvm/tools/lli/lli.cpp b/llvm/tools/lli/lli.cpp
index bfe7e8f04303..0efd0df2c12b 100644
--- a/llvm/tools/lli/lli.cpp
+++ b/llvm/tools/lli/lli.cpp
@@ -197,6 +197,11 @@ namespace {
     cl::desc("Generate software floating point library calls"),
     cl::init(false));
 
+  cl::opt<bool> NoProcessSymbols(
+      "no-process-syms",
+      cl::desc("Do not resolve lli process symbols in JIT'd code"),
+      cl::init(false));
+
   enum class DumpKind {
     NoDump,
     DumpFuncsToStdOut,
@@ -794,12 +799,16 @@ int runOrcLazyJIT(const char *ProgName) {
   });
 
   orc::MangleAndInterner Mangle(J->getExecutionSession(), J->getDataLayout());
-  J->getMainJITDylib().addGenerator(
-      ExitOnErr(orc::DynamicLibrarySearchGenerator::GetForCurrentProcess(
-          J->getDataLayout().getGlobalPrefix(),
-          [MainName = Mangle("main")](const orc::SymbolStringPtr &Name) {
-            return Name != MainName;
-          })));
+
+  // Unless they've been explicitly disabled, make process symbols available to
+  // JIT'd code.
+  if (!NoProcessSymbols)
+    J->getMainJITDylib().addGenerator(
+        ExitOnErr(orc::DynamicLibrarySearchGenerator::GetForCurrentProcess(
+            J->getDataLayout().getGlobalPrefix(),
+            [MainName = Mangle("main")](const orc::SymbolStringPtr &Name) {
+              return Name != MainName;
+            })));
 
   orc::LocalCXXRuntimeOverrides CXXRuntimeOverrides;
   ExitOnErr(CXXRuntimeOverrides.enable(J->getMainJITDylib(), Mangle));
author	Dimitry Andric <dim@FreeBSD.org>	2020-02-14 21:24:03 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2020-02-14 21:24:03 +0000
commit	d75c7debad4509ece98792074e64b8a650a27bdb (patch)
tree	f8d77975739b43bf7ffef0612579168cb9ec9474
parent	9c2f6c4bb805c7ac08c8925c96e429fcc322725e (diff)