543 files changed, 6708 insertions, 3800 deletions
diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h
index ca3ca24487a5..09d80841fa5d 100644
--- a/llvm/include/llvm-c/Core.h
+++ b/llvm/include/llvm-c/Core.h
@@ -18,6 +18,7 @@
 #include "llvm-c/Deprecated.h"
 #include "llvm-c/ErrorHandling.h"
 #include "llvm-c/ExternC.h"
+
 #include "llvm-c/Types.h"
 
 LLVM_C_EXTERN_C_BEGIN
diff --git a/llvm/include/llvm-c/DebugInfo.h b/llvm/include/llvm-c/DebugInfo.h
index a515533f38e2..8554a0199873 100644
--- a/llvm/include/llvm-c/DebugInfo.h
+++ b/llvm/include/llvm-c/DebugInfo.h
@@ -16,8 +16,8 @@
 #ifndef LLVM_C_DEBUGINFO_H
 #define LLVM_C_DEBUGINFO_H
 
-#include "llvm-c/Core.h"
 #include "llvm-c/ExternC.h"
+#include "llvm-c/Types.h"
 
 LLVM_C_EXTERN_C_BEGIN
 
diff --git a/llvm/include/llvm/ADT/APFixedPoint.h b/llvm/include/llvm/ADT/APFixedPoint.h
index d6349e6b2a88..92cabdd9f9e4 100644
--- a/llvm/include/llvm/ADT/APFixedPoint.h
+++ b/llvm/include/llvm/ADT/APFixedPoint.h
@@ -5,12 +5,12 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
+///
 /// \file
 /// Defines the fixed point number interface.
 /// This is a class for abstracting various operations performed on fixed point
 /// types.
-//
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_APFIXEDPOINT_H
diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
index 40e0e32c77a8..17b57de7b0aa 100644
--- a/llvm/include/llvm/ADT/APFloat.h
+++ b/llvm/include/llvm/ADT/APFloat.h
@@ -7,7 +7,6 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// \brief
 /// This file declares a class to represent arbitrary precision floating point
 /// values and provide a variety of arithmetic operations on them.
 ///
diff --git a/llvm/include/llvm/ADT/APSInt.h b/llvm/include/llvm/ADT/APSInt.h
index c1cf3c546070..7b6af436f577 100644
--- a/llvm/include/llvm/ADT/APSInt.h
+++ b/llvm/include/llvm/ADT/APSInt.h
@@ -5,10 +5,11 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements the APSInt class, which is a simple class that
-// represents an arbitrary sized integer that knows its signedness.
-//
+///
+/// \file
+/// This file implements the APSInt class, which is a simple class that
+/// represents an arbitrary sized integer that knows its signedness.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_APSINT_H
diff --git a/llvm/include/llvm/ADT/Any.h b/llvm/include/llvm/ADT/Any.h
index 1b4f2c2fa985..1c7ba0371781 100644
--- a/llvm/include/llvm/ADT/Any.h
+++ b/llvm/include/llvm/ADT/Any.h
@@ -5,11 +5,12 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-//  This file provides Any, a non-template class modeled in the spirit of
-//  std::any.  The idea is to provide a type-safe replacement for C's void*.
-//  It can hold a value of any copy-constructible copy-assignable type
-//
+///
+/// \file
+///  This file provides Any, a non-template class modeled in the spirit of
+///  std::any.  The idea is to provide a type-safe replacement for C's void*.
+///  It can hold a value of any copy-constructible copy-assignable type
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_ANY_H
diff --git a/llvm/include/llvm/ADT/BitVector.h b/llvm/include/llvm/ADT/BitVector.h
index fff4a8f578d2..9540b3985963 100644
--- a/llvm/include/llvm/ADT/BitVector.h
+++ b/llvm/include/llvm/ADT/BitVector.h
@@ -5,9 +5,10 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements the BitVector class.
-//
+///
+/// \file
+/// This file implements the BitVector class.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_BITVECTOR_H
diff --git a/llvm/include/llvm/ADT/BreadthFirstIterator.h b/llvm/include/llvm/ADT/BreadthFirstIterator.h
index 7d728a23b19a..1312b5f91e83 100644
--- a/llvm/include/llvm/ADT/BreadthFirstIterator.h
+++ b/llvm/include/llvm/ADT/BreadthFirstIterator.h
@@ -5,13 +5,14 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file builds on the ADT/GraphTraits.h file to build a generic breadth
-// first graph iterator.  This file exposes the following functions/types:
-//
-// bf_begin/bf_end/bf_iterator
-//   * Normal breadth-first iteration - visit a graph level-by-level.
-//
+///
+/// \file
+/// This file builds on the ADT/GraphTraits.h file to build a generic breadth
+/// first graph iterator.  This file exposes the following functions/types:
+///
+/// bf_begin/bf_end/bf_iterator
+///   * Normal breadth-first iteration - visit a graph level-by-level.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_BREADTHFIRSTITERATOR_H
diff --git a/llvm/include/llvm/ADT/CachedHashString.h b/llvm/include/llvm/ADT/CachedHashString.h
index 785bd07b3a44..ebd40e320715 100644
--- a/llvm/include/llvm/ADT/CachedHashString.h
+++ b/llvm/include/llvm/ADT/CachedHashString.h
@@ -5,15 +5,16 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines CachedHashString and CachedHashStringRef.  These are owning
-// and not-owning string types that store their hash in addition to their string
-// data.
-//
-// Unlike std::string, CachedHashString can be used in DenseSet/DenseMap
-// (because, unlike std::string, CachedHashString lets us have empty and
-// tombstone values).
-//
+///
+/// \file
+/// This file defines CachedHashString and CachedHashStringRef.  These are
+/// owning and not-owning string types that store their hash in addition to
+/// their string data.
+///
+/// Unlike std::string, CachedHashString can be used in DenseSet/DenseMap
+/// (because, unlike std::string, CachedHashString lets us have empty and
+/// tombstone values).
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_CACHEDHASHSTRING_H
diff --git a/llvm/include/llvm/ADT/CoalescingBitVector.h b/llvm/include/llvm/ADT/CoalescingBitVector.h
index 6935c255a099..4940bc1c2c18 100644
--- a/llvm/include/llvm/ADT/CoalescingBitVector.h
+++ b/llvm/include/llvm/ADT/CoalescingBitVector.h
@@ -6,7 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 ///
-/// \file A bitvector that uses an IntervalMap to coalesce adjacent elements
+/// \file
+/// A bitvector that uses an IntervalMap to coalesce adjacent elements
 /// into intervals.
 ///
 //===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/ADT/DenseMap.h b/llvm/include/llvm/ADT/DenseMap.h
index 595eabd0ffb4..7673b66ca42a 100644
--- a/llvm/include/llvm/ADT/DenseMap.h
+++ b/llvm/include/llvm/ADT/DenseMap.h
@@ -5,9 +5,10 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the DenseMap class.
-//
+///
+/// \file
+/// This file defines the DenseMap class.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_DENSEMAP_H
diff --git a/llvm/include/llvm/ADT/DenseMapInfo.h b/llvm/include/llvm/ADT/DenseMapInfo.h
index 75b7371a3683..afd478f0b849 100644
--- a/llvm/include/llvm/ADT/DenseMapInfo.h
+++ b/llvm/include/llvm/ADT/DenseMapInfo.h
@@ -5,9 +5,10 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines DenseMapInfo traits for DenseMap.
-//
+///
+/// \file
+/// This file defines DenseMapInfo traits for DenseMap.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_DENSEMAPINFO_H
diff --git a/llvm/include/llvm/ADT/DenseSet.h b/llvm/include/llvm/ADT/DenseSet.h
index e767211a0900..b89c88626e43 100644
--- a/llvm/include/llvm/ADT/DenseSet.h
+++ b/llvm/include/llvm/ADT/DenseSet.h
@@ -5,9 +5,10 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the DenseSet and SmallDenseSet classes.
-//
+///
+/// \file
+/// This file defines the DenseSet and SmallDenseSet classes.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_DENSESET_H
diff --git a/llvm/include/llvm/ADT/DepthFirstIterator.h b/llvm/include/llvm/ADT/DepthFirstIterator.h
index 42ac61d7cf52..cea6fbcd9d29 100644
--- a/llvm/include/llvm/ADT/DepthFirstIterator.h
+++ b/llvm/include/llvm/ADT/DepthFirstIterator.h
@@ -5,28 +5,30 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file builds on the ADT/GraphTraits.h file to build generic depth
-// first graph iterator.  This file exposes the following functions/types:
-//
-// df_begin/df_end/df_iterator
-//   * Normal depth-first iteration - visit a node and then all of its children.
-//
-// idf_begin/idf_end/idf_iterator
-//   * Depth-first iteration on the 'inverse' graph.
-//
-// df_ext_begin/df_ext_end/df_ext_iterator
-//   * Normal depth-first iteration - visit a node and then all of its children.
-//     This iterator stores the 'visited' set in an external set, which allows
-//     it to be more efficient, and allows external clients to use the set for
-//     other purposes.
-//
-// idf_ext_begin/idf_ext_end/idf_ext_iterator
-//   * Depth-first iteration on the 'inverse' graph.
-//     This iterator stores the 'visited' set in an external set, which allows
-//     it to be more efficient, and allows external clients to use the set for
-//     other purposes.
-//
+///
+/// \file
+/// This file builds on the ADT/GraphTraits.h file to build generic depth
+/// first graph iterator.  This file exposes the following functions/types:
+///
+/// df_begin/df_end/df_iterator
+///   * Normal depth-first iteration - visit a node and then all of its
+///     children.
+///
+/// idf_begin/idf_end/idf_iterator
+///   * Depth-first iteration on the 'inverse' graph.
+///
+/// df_ext_begin/df_ext_end/df_ext_iterator
+///   * Normal depth-first iteration - visit a node and then all of its
+///     children. This iterator stores the 'visited' set in an external set,
+///     which allows it to be more efficient, and allows external clients to
+///     use the set for other purposes.
+///
+/// idf_ext_begin/idf_ext_end/idf_ext_iterator
+///   * Depth-first iteration on the 'inverse' graph.
+///     This iterator stores the 'visited' set in an external set, which
+///     allows it to be more efficient, and allows external clients to use
+///     the set for other purposes.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_DEPTHFIRSTITERATOR_H
diff --git a/llvm/include/llvm/ADT/DirectedGraph.h b/llvm/include/llvm/ADT/DirectedGraph.h
index e8bb9e6b2292..83c0bea6393c 100644
--- a/llvm/include/llvm/ADT/DirectedGraph.h
+++ b/llvm/include/llvm/ADT/DirectedGraph.h
@@ -5,10 +5,11 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the interface and a base class implementation for a
-// directed graph.
-//
+///
+/// \file
+/// This file defines the interface and a base class implementation for a
+/// directed graph.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_DIRECTEDGRAPH_H
diff --git a/llvm/include/llvm/ADT/EnumeratedArray.h b/llvm/include/llvm/ADT/EnumeratedArray.h
index a66ec9d08c37..f54a50446c6e 100644
--- a/llvm/include/llvm/ADT/EnumeratedArray.h
+++ b/llvm/include/llvm/ADT/EnumeratedArray.h
@@ -5,9 +5,11 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines an array type that can be indexed using scoped enum values.
-//
+///
+/// \file
+/// This file defines an array type that can be indexed using scoped enum
+/// values.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_ENUMERATEDARRAY_H
diff --git a/llvm/include/llvm/ADT/EpochTracker.h b/llvm/include/llvm/ADT/EpochTracker.h
index 7a2e4220afec..b06888494466 100644
--- a/llvm/include/llvm/ADT/EpochTracker.h
+++ b/llvm/include/llvm/ADT/EpochTracker.h
@@ -5,11 +5,12 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the DebugEpochBase and DebugEpochBase::HandleBase classes.
-// These can be used to write iterators that are fail-fast when LLVM is built
-// with asserts enabled.
-//
+///
+/// \file
+/// This file defines the DebugEpochBase and DebugEpochBase::HandleBase classes.
+/// These can be used to write iterators that are fail-fast when LLVM is built
+/// with asserts enabled.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_EPOCHTRACKER_H
diff --git a/llvm/include/llvm/ADT/EquivalenceClasses.h b/llvm/include/llvm/ADT/EquivalenceClasses.h
index de6bb3bca7e3..f12b683ead2d 100644
--- a/llvm/include/llvm/ADT/EquivalenceClasses.h
+++ b/llvm/include/llvm/ADT/EquivalenceClasses.h
@@ -5,10 +5,11 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// Generic implementation of equivalence classes through the use Tarjan's
-// efficient union-find algorithm.
-//
+///
+/// \file
+/// Generic implementation of equivalence classes through the use Tarjan's
+/// efficient union-find algorithm.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_EQUIVALENCECLASSES_H
diff --git a/llvm/include/llvm/ADT/FloatingPointMode.h b/llvm/include/llvm/ADT/FloatingPointMode.h
index 62c127a49620..9cc69b8a8344 100644
--- a/llvm/include/llvm/ADT/FloatingPointMode.h
+++ b/llvm/include/llvm/ADT/FloatingPointMode.h
@@ -5,9 +5,10 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// Utilities for dealing with flags related to floating point mode controls.
-//
+///
+/// \file
+/// Utilities for dealing with flags related to floating point mode controls.
+///
 //===----------------------------------------------------------------------===/
 
 #ifndef LLVM_ADT_FLOATINGPOINTMODE_H
diff --git a/llvm/include/llvm/ADT/FoldingSet.h b/llvm/include/llvm/ADT/FoldingSet.h
index fb1cb03a4b5c..a8707f0ee81e 100644
--- a/llvm/include/llvm/ADT/FoldingSet.h
+++ b/llvm/include/llvm/ADT/FoldingSet.h
@@ -5,11 +5,12 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines a hash set that can be used to remove duplication of nodes
-// in a graph.  This code was originally created by Chris Lattner for use with
-// SelectionDAGCSEMap, but was isolated to provide use across the llvm code set.
-//
+///
+/// \file
+/// This file defines a hash set that can be used to remove duplication of nodes
+/// in a graph.  This code was originally created by Chris Lattner for use with
+/// SelectionDAGCSEMap, but was isolated to provide use across the llvm code
+/// set.
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_FOLDINGSET_H
diff --git a/llvm/include/llvm/ADT/GenericCycleImpl.h b/llvm/include/llvm/ADT/GenericCycleImpl.h
index 5f29236eac47..d443f9e21a47 100644
--- a/llvm/include/llvm/ADT/GenericCycleImpl.h
+++ b/llvm/include/llvm/ADT/GenericCycleImpl.h
@@ -5,18 +5,19 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This template implementation resides in a separate file so that it
-// does not get injected into every .cpp file that includes the
-// generic header.
-//
-// DO NOT INCLUDE THIS FILE WHEN MERELY USING CYCLEINFO.
-//
-// This file should only be included by files that implement a
-// specialization of the relevant templates. Currently these are:
-// - CycleAnalysis.cpp
-// - MachineCycleAnalysis.cpp
-//
+///
+/// \file
+/// This template implementation resides in a separate file so that it
+/// does not get injected into every .cpp file that includes the
+/// generic header.
+///
+/// DO NOT INCLUDE THIS FILE WHEN MERELY USING CYCLEINFO.
+///
+/// This file should only be included by files that implement a
+/// specialization of the relevant templates. Currently these are:
+/// - CycleAnalysis.cpp
+/// - MachineCycleAnalysis.cpp
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_GENERICCYCLEIMPL_H
@@ -77,7 +78,7 @@ template <typename ContextT> class GenericCycleInfoCompute {
     unsigned Start = 0; // DFS start; positive if block is found
     unsigned End = 0;   // DFS end
 
-    DFSInfo() {}
+    DFSInfo() = default;
     explicit DFSInfo(unsigned Start) : Start(Start) {}
 
     /// Whether this node is an ancestor (or equal to) the node \p Other
diff --git a/llvm/include/llvm/ADT/GenericCycleInfo.h b/llvm/include/llvm/ADT/GenericCycleInfo.h
index 7768253e121d..d5f9cd9142ac 100644
--- a/llvm/include/llvm/ADT/GenericCycleInfo.h
+++ b/llvm/include/llvm/ADT/GenericCycleInfo.h
@@ -5,7 +5,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
+///
 /// \file
 /// \brief Find all cycles in a control-flow graph, including irreducible loops.
 ///
@@ -22,7 +22,7 @@
 ///   unique cycle C which is a superset of L.
 /// - In the absence of irreducible control flow, the cycles are
 ///   exactly the natural loops in the program.
-//
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_GENERICCYCLEINFO_H
diff --git a/llvm/include/llvm/ADT/GraphTraits.h b/llvm/include/llvm/ADT/GraphTraits.h
index 3ce91225d80d..3a7773592af3 100644
--- a/llvm/include/llvm/ADT/GraphTraits.h
+++ b/llvm/include/llvm/ADT/GraphTraits.h
@@ -5,13 +5,15 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the little GraphTraits<X> template class that should be
-// specialized by classes that want to be iteratable by generic graph iterators.
-//
-// This file also defines the marker class Inverse that is used to iterate over
-// graphs in a graph defined, inverse ordering...
-//
+///
+/// \file
+/// This file defines the little GraphTraits<X> template class that should be
+/// specialized by classes that want to be iteratable by generic graph
+/// iterators.
+///
+/// This file also defines the marker class Inverse that is used to iterate over
+/// graphs in a graph defined, inverse ordering...
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_GRAPHTRAITS_H
diff --git a/llvm/include/llvm/ADT/ImmutableList.h b/llvm/include/llvm/ADT/ImmutableList.h
index cf27c5a16d28..23f82691825c 100644
--- a/llvm/include/llvm/ADT/ImmutableList.h
+++ b/llvm/include/llvm/ADT/ImmutableList.h
@@ -5,9 +5,10 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the ImmutableList class.
-//
+///
+/// \file
+/// This file defines the ImmutableList class.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_IMMUTABLELIST_H
diff --git a/llvm/include/llvm/ADT/ImmutableMap.h b/llvm/include/llvm/ADT/ImmutableMap.h
index f0e898cafaf9..c9351b3213dc 100644
--- a/llvm/include/llvm/ADT/ImmutableMap.h
+++ b/llvm/include/llvm/ADT/ImmutableMap.h
@@ -5,9 +5,10 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the ImmutableMap class.
-//
+///
+/// \file
+/// This file defines the ImmutableMap class.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_IMMUTABLEMAP_H
diff --git a/llvm/include/llvm/ADT/ImmutableSet.h b/llvm/include/llvm/ADT/ImmutableSet.h
index 8cef5acbafaa..b513fe9ec011 100644
--- a/llvm/include/llvm/ADT/ImmutableSet.h
+++ b/llvm/include/llvm/ADT/ImmutableSet.h
@@ -5,9 +5,10 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the ImutAVLTree and ImmutableSet classes.
-//
+///
+/// \file
+/// This file defines the ImutAVLTree and ImmutableSet classes.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_IMMUTABLESET_H
diff --git a/llvm/include/llvm/ADT/IndexedMap.h b/llvm/include/llvm/ADT/IndexedMap.h
index b44f16b91d76..5ac5f798269b 100644
--- a/llvm/include/llvm/ADT/IndexedMap.h
+++ b/llvm/include/llvm/ADT/IndexedMap.h
@@ -5,15 +5,16 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements an indexed map. The index map template takes two
-// types. The first is the mapped type and the second is a functor
-// that maps its argument to a size_t. On instantiation a "null" value
-// can be provided to be used as a "does not exist" indicator in the
-// map. A member function grow() is provided that given the value of
-// the maximally indexed key (the argument of the functor) makes sure
-// the map has enough space for it.
-//
+///
+/// \file
+/// This file implements an indexed map. The index map template takes two
+/// types. The first is the mapped type and the second is a functor
+/// that maps its argument to a size_t. On instantiation a "null" value
+/// can be provided to be used as a "does not exist" indicator in the
+/// map. A member function grow() is provided that given the value of
+/// the maximally indexed key (the argument of the functor) makes sure
+/// the map has enough space for it.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_INDEXEDMAP_H
diff --git a/llvm/include/llvm/ADT/IntEqClasses.h b/llvm/include/llvm/ADT/IntEqClasses.h
index 08f46a3079ef..84bb58cb736c 100644
--- a/llvm/include/llvm/ADT/IntEqClasses.h
+++ b/llvm/include/llvm/ADT/IntEqClasses.h
@@ -5,16 +5,17 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// Equivalence classes for small integers. This is a mapping of the integers
-// 0 .. N-1 into M equivalence classes numbered 0 .. M-1.
-//
-// Initially each integer has its own equivalence class. Classes are joined by
-// passing a representative member of each class to join().
-//
-// Once the classes are built, compress() will number them 0 .. M-1 and prevent
-// further changes.
-//
+///
+/// \file
+/// Equivalence classes for small integers. This is a mapping of the integers
+/// 0 .. N-1 into M equivalence classes numbered 0 .. M-1.
+///
+/// Initially each integer has its own equivalence class. Classes are joined by
+/// passing a representative member of each class to join().
+///
+/// Once the classes are built, compress() will number them 0 .. M-1 and prevent
+/// further changes.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_INTEQCLASSES_H
diff --git a/llvm/include/llvm/ADT/IntervalMap.h b/llvm/include/llvm/ADT/IntervalMap.h
index 3c107a3622a9..368ed46f98d2 100644
--- a/llvm/include/llvm/ADT/IntervalMap.h
+++ b/llvm/include/llvm/ADT/IntervalMap.h
@@ -5,30 +5,31 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements a coalescing interval map for small objects.
-//
-// KeyT objects are mapped to ValT objects. Intervals of keys that map to the
-// same value are represented in a compressed form.
-//
-// Iterators provide ordered access to the compressed intervals rather than the
-// individual keys, and insert and erase operations use key intervals as well.
-//
-// Like SmallVector, IntervalMap will store the first N intervals in the map
-// object itself without any allocations. When space is exhausted it switches to
-// a B+-tree representation with very small overhead for small key and value
-// objects.
-//
-// A Traits class specifies how keys are compared. It also allows IntervalMap to
-// work with both closed and half-open intervals.
-//
-// Keys and values are not stored next to each other in a std::pair, so we don't
-// provide such a value_type. Dereferencing iterators only returns the mapped
-// value. The interval bounds are accessible through the start() and stop()
-// iterator methods.
-//
-// IntervalMap is optimized for small key and value objects, 4 or 8 bytes each
-// is the optimal size. For large objects use std::map instead.
+///
+/// \file
+/// This file implements a coalescing interval map for small objects.
+///
+/// KeyT objects are mapped to ValT objects. Intervals of keys that map to the
+/// same value are represented in a compressed form.
+///
+/// Iterators provide ordered access to the compressed intervals rather than the
+/// individual keys, and insert and erase operations use key intervals as well.
+///
+/// Like SmallVector, IntervalMap will store the first N intervals in the map
+/// object itself without any allocations. When space is exhausted it switches
+/// to a B+-tree representation with very small overhead for small key and
+/// value objects.
+///
+/// A Traits class specifies how keys are compared. It also allows IntervalMap
+/// to work with both closed and half-open intervals.
+///
+/// Keys and values are not stored next to each other in a std::pair, so we
+/// don't provide such a value_type. Dereferencing iterators only returns the
+/// mapped value. The interval bounds are accessible through the start() and
+/// stop() iterator methods.
+///
+/// IntervalMap is optimized for small key and value objects, 4 or 8 bytes
+/// each is the optimal size. For large objects use std::map instead.
 //
 //===----------------------------------------------------------------------===//
 //
diff --git a/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h b/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h
index 9715c9d01b98..975535bb5676 100644
--- a/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h
+++ b/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h
@@ -5,51 +5,56 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the RefCountedBase, ThreadSafeRefCountedBase, and
-// IntrusiveRefCntPtr classes.
-//
-// IntrusiveRefCntPtr is a smart pointer to an object which maintains a
-// reference count.  (ThreadSafe)RefCountedBase is a mixin class that adds a
-// refcount member variable and methods for updating the refcount.  An object
-// that inherits from (ThreadSafe)RefCountedBase deletes itself when its
-// refcount hits zero.
-//
-// For example:
-//
-//   class MyClass : public RefCountedBase<MyClass> {};
-//
-//   void foo() {
-//     // Constructing an IntrusiveRefCntPtr increases the pointee's refcount by
-//     // 1 (from 0 in this case).
-//     IntrusiveRefCntPtr<MyClass> Ptr1(new MyClass());
-//
-//     // Copying an IntrusiveRefCntPtr increases the pointee's refcount by 1.
-//     IntrusiveRefCntPtr<MyClass> Ptr2(Ptr1);
-//
-//     // Constructing an IntrusiveRefCntPtr has no effect on the object's
-//     // refcount.  After a move, the moved-from pointer is null.
-//     IntrusiveRefCntPtr<MyClass> Ptr3(std::move(Ptr1));
-//     assert(Ptr1 == nullptr);
-//
-//     // Clearing an IntrusiveRefCntPtr decreases the pointee's refcount by 1.
-//     Ptr2.reset();
-//
-//     // The object deletes itself when we return from the function, because
-//     // Ptr3's destructor decrements its refcount to 0.
-//   }
-//
-// You can use IntrusiveRefCntPtr with isa<T>(), dyn_cast<T>(), etc.:
-//
-//   IntrusiveRefCntPtr<MyClass> Ptr(new MyClass());
-//   OtherClass *Other = dyn_cast<OtherClass>(Ptr);  // Ptr.get() not required
-//
-// IntrusiveRefCntPtr works with any class that
-//
-//  - inherits from (ThreadSafe)RefCountedBase,
-//  - has Retain() and Release() methods, or
-//  - specializes IntrusiveRefCntPtrInfo.
-//
+///
+/// \file
+/// This file defines the RefCountedBase, ThreadSafeRefCountedBase, and
+/// IntrusiveRefCntPtr classes.
+///
+/// IntrusiveRefCntPtr is a smart pointer to an object which maintains a
+/// reference count.  (ThreadSafe)RefCountedBase is a mixin class that adds a
+/// refcount member variable and methods for updating the refcount.  An object
+/// that inherits from (ThreadSafe)RefCountedBase deletes itself when its
+/// refcount hits zero.
+///
+/// For example:
+///
+/// ```
+///   class MyClass : public RefCountedBase<MyClass> {};
+///
+///   void foo() {
+///     // Constructing an IntrusiveRefCntPtr increases the pointee's refcount
+///     // by 1 (from 0 in this case).
+///     IntrusiveRefCntPtr<MyClass> Ptr1(new MyClass());
+///
+///     // Copying an IntrusiveRefCntPtr increases the pointee's refcount by 1.
+///     IntrusiveRefCntPtr<MyClass> Ptr2(Ptr1);
+///
+///     // Constructing an IntrusiveRefCntPtr has no effect on the object's
+///     // refcount.  After a move, the moved-from pointer is null.
+///     IntrusiveRefCntPtr<MyClass> Ptr3(std::move(Ptr1));
+///     assert(Ptr1 == nullptr);
+///
+///     // Clearing an IntrusiveRefCntPtr decreases the pointee's refcount by 1.
+///     Ptr2.reset();
+///
+///     // The object deletes itself when we return from the function, because
+///     // Ptr3's destructor decrements its refcount to 0.
+///   }
+/// ```
+///
+/// You can use IntrusiveRefCntPtr with isa<T>(), dyn_cast<T>(), etc.:
+///
+/// ```
+///   IntrusiveRefCntPtr<MyClass> Ptr(new MyClass());
+///   OtherClass *Other = dyn_cast<OtherClass>(Ptr);  // Ptr.get() not required
+/// ```
+///
+/// IntrusiveRefCntPtr works with any class that
+///
+///  - inherits from (ThreadSafe)RefCountedBase,
+///  - has Retain() and Release() methods, or
+///  - specializes IntrusiveRefCntPtrInfo.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_INTRUSIVEREFCNTPTR_H
diff --git a/llvm/include/llvm/ADT/MapVector.h b/llvm/include/llvm/ADT/MapVector.h
index d281166b3e19..c4e5c7e2bac5 100644
--- a/llvm/include/llvm/ADT/MapVector.h
+++ b/llvm/include/llvm/ADT/MapVector.h
@@ -5,12 +5,13 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements a map that provides insertion order iteration. The
-// interface is purposefully minimal. The key is assumed to be cheap to copy
-// and 2 copies are kept, one for indexing in a DenseMap, one for iteration in
-// a std::vector.
-//
+///
+/// \file
+/// This file implements a map that provides insertion order iteration. The
+/// interface is purposefully minimal. The key is assumed to be cheap to copy
+/// and 2 copies are kept, one for indexing in a DenseMap, one for iteration in
+/// a std::vector.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_MAPVECTOR_H
diff --git a/llvm/include/llvm/ADT/None.h b/llvm/include/llvm/ADT/None.h
index 004ca0ac50ac..1a66be4097df 100644
--- a/llvm/include/llvm/ADT/None.h
+++ b/llvm/include/llvm/ADT/None.h
@@ -5,11 +5,12 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-//  This file provides None, an enumerator for use in implicit constructors
-//  of various (usually templated) types to make such construction more
-//  terse.
-//
+///
+/// \file
+///  This file provides None, an enumerator for use in implicit constructors
+///  of various (usually templated) types to make such construction more
+///  terse.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_NONE_H
diff --git a/llvm/include/llvm/ADT/Optional.h b/llvm/include/llvm/ADT/Optional.h
index 7d6b3e92f6b2..e047b0fc6514 100644
--- a/llvm/include/llvm/ADT/Optional.h
+++ b/llvm/include/llvm/ADT/Optional.h
@@ -5,11 +5,12 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-//  This file provides Optional, a template class modeled in the spirit of
-//  OCaml's 'opt' variant.  The idea is to strongly type whether or not
-//  a value can be optional.
-//
+///
+/// \file
+///  This file provides Optional, a template class modeled in the spirit of
+///  OCaml's 'opt' variant.  The idea is to strongly type whether or not
+///  a value can be optional.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_OPTIONAL_H
@@ -241,7 +242,7 @@ template <typename T> class Optional {
 public:
   using value_type = T;
 
-  constexpr Optional() {}
+  constexpr Optional() = default;
   constexpr Optional(NoneType) {}
 
   constexpr Optional(const T &y) : Storage(in_place, y) {}
diff --git a/llvm/include/llvm/ADT/PackedVector.h b/llvm/include/llvm/ADT/PackedVector.h
index ae7f8cc85743..b448685ab616 100644
--- a/llvm/include/llvm/ADT/PackedVector.h
+++ b/llvm/include/llvm/ADT/PackedVector.h
@@ -5,9 +5,10 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements the PackedVector class.
-//
+///
+/// \file
+/// This file implements the PackedVector class.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_PACKEDVECTOR_H
diff --git a/llvm/include/llvm/ADT/PointerIntPair.h b/llvm/include/llvm/ADT/PointerIntPair.h
index 393ace6b70fc..b7ddf8855605 100644
--- a/llvm/include/llvm/ADT/PointerIntPair.h
+++ b/llvm/include/llvm/ADT/PointerIntPair.h
@@ -5,9 +5,10 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the PointerIntPair class.
-//
+///
+/// \file
+/// This file defines the PointerIntPair class.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_POINTERINTPAIR_H
diff --git a/llvm/include/llvm/ADT/PointerUnion.h b/llvm/include/llvm/ADT/PointerUnion.h
index 5ce2dbee4b3a..04d566bbc75e 100644
--- a/llvm/include/llvm/ADT/PointerUnion.h
+++ b/llvm/include/llvm/ADT/PointerUnion.h
@@ -5,10 +5,11 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the PointerUnion class, which is a discriminated union of
-// pointer types.
-//
+///
+/// \file
+/// This file defines the PointerUnion class, which is a discriminated union of
+/// pointer types.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_POINTERUNION_H
diff --git a/llvm/include/llvm/ADT/PostOrderIterator.h b/llvm/include/llvm/ADT/PostOrderIterator.h
index 74314d39d825..d0366045fa09 100644
--- a/llvm/include/llvm/ADT/PostOrderIterator.h
+++ b/llvm/include/llvm/ADT/PostOrderIterator.h
@@ -5,11 +5,12 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file builds on the ADT/GraphTraits.h file to build a generic graph
-// post order iterator.  This should work over any graph type that has a
-// GraphTraits specialization.
-//
+///
+/// \file
+/// This file builds on the ADT/GraphTraits.h file to build a generic graph
+/// post order iterator.  This should work over any graph type that has a
+/// GraphTraits specialization.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_POSTORDERITERATOR_H
diff --git a/llvm/include/llvm/ADT/PriorityQueue.h b/llvm/include/llvm/ADT/PriorityQueue.h
index cf79ee10ba7f..f40c160f0f5e 100644
--- a/llvm/include/llvm/ADT/PriorityQueue.h
+++ b/llvm/include/llvm/ADT/PriorityQueue.h
@@ -5,9 +5,10 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the PriorityQueue class.
-//
+///
+/// \file
+/// This file defines the PriorityQueue class.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_PRIORITYQUEUE_H
diff --git a/llvm/include/llvm/ADT/STLArrayExtras.h b/llvm/include/llvm/ADT/STLArrayExtras.h
new file mode 100644
index 000000000000..5b666641580e
--- /dev/null
+++ b/llvm/include/llvm/ADT/STLArrayExtras.h
@@ -0,0 +1,35 @@
+//===- llvm/ADT/STLArrayExtras.h - additions to <array> ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains some templates that are useful if you are working with the
+// STL at all.
+//
+// No library is required when using these functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_STLARRAYEXTRAS_H
+#define LLVM_ADT_STLARRAYEXTRAS_H
+
+#include <cstddef>
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+//     Extra additions for arrays
+//===----------------------------------------------------------------------===//
+
+/// Find the length of an array.
+template <class T, std::size_t N>
+constexpr inline size_t array_lengthof(T (&)[N]) {
+  return N;
+}
+
+} // end namespace llvm
+
+#endif // LLVM_ADT_STLARRAYEXTRAS_H
diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h
index c3200c926518..e2972f4f902a 100644
--- a/llvm/include/llvm/ADT/STLExtras.h
+++ b/llvm/include/llvm/ADT/STLExtras.h
@@ -5,21 +5,23 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file contains some templates that are useful if you are working with the
-// STL at all.
-//
-// No library is required when using these functions.
-//
+///
+/// \file
+/// This file contains some templates that are useful if you are working with
+/// the STL at all.
+///
+/// No library is required when using these functions.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_STLEXTRAS_H
 #define LLVM_ADT_STLEXTRAS_H
 
-#include "llvm/ADT/identity.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLArrayExtras.h"
 #include "llvm/ADT/STLForwardCompat.h"
 #include "llvm/ADT/STLFunctionalExtras.h"
+#include "llvm/ADT/identity.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Config/abi-breaking.h"
@@ -1410,7 +1412,7 @@ constexpr decltype(auto) makeVisitor(CallableTs &&...Callables) {
 }
 
 //===----------------------------------------------------------------------===//
-//     Extra additions for arrays
+//     Extra additions to <algorithm>
 //===----------------------------------------------------------------------===//
 
 // We have a copy here so that LLVM behaves the same when using different
@@ -1430,12 +1432,6 @@ void shuffle(Iterator first, Iterator last, RNG &&g) {
   }
 }
 
-/// Find the length of an array.
-template <class T, std::size_t N>
-constexpr inline size_t array_lengthof(T (&)[N]) {
-  return N;
-}
-
 /// Adapt std::less<T> for array_pod_sort.
 template<typename T>
 inline int array_pod_sort_comparator(const void *P1, const void *P2) {
@@ -1563,10 +1559,6 @@ inline void sort(Container &&C, Compare Comp) {
   llvm::sort(adl_begin(C), adl_end(C), Comp);
 }
 
-//===----------------------------------------------------------------------===//
-//     Extra additions to <algorithm>
-//===----------------------------------------------------------------------===//
-
 /// Get the size of a range. This is a wrapper function around std::distance
 /// which is only enabled when the operation is O(1).
 template <typename R>
diff --git a/llvm/include/llvm/ADT/STLForwardCompat.h b/llvm/include/llvm/ADT/STLForwardCompat.h
index 440b29df260c..0aa577d3ee1a 100644
--- a/llvm/include/llvm/ADT/STLForwardCompat.h
+++ b/llvm/include/llvm/ADT/STLForwardCompat.h
@@ -5,12 +5,13 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file contains library features backported from future STL versions.
-//
-// These should be replaced with their STL counterparts as the C++ version LLVM
-// is compiled with is updated.
-//
+///
+/// \file
+/// This file contains library features backported from future STL versions.
+///
+/// These should be replaced with their STL counterparts as the C++ version LLVM
+/// is compiled with is updated.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_STLFORWARDCOMPAT_H
diff --git a/llvm/include/llvm/ADT/ScopeExit.h b/llvm/include/llvm/ADT/ScopeExit.h
index 61618818bae5..7f013f3f7979 100644
--- a/llvm/include/llvm/ADT/ScopeExit.h
+++ b/llvm/include/llvm/ADT/ScopeExit.h
@@ -5,10 +5,11 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the make_scope_exit function, which executes user-defined
-// cleanup logic at scope exit.
-//
+///
+/// \file
+/// This file defines the make_scope_exit function, which executes user-defined
+/// cleanup logic at scope exit.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_SCOPEEXIT_H
diff --git a/llvm/include/llvm/ADT/SetOperations.h b/llvm/include/llvm/ADT/SetOperations.h
index 3e30b6bb83d3..c9462f077dc8 100644
--- a/llvm/include/llvm/ADT/SetOperations.h
+++ b/llvm/include/llvm/ADT/SetOperations.h
@@ -5,10 +5,11 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines generic set operations that may be used on set's of
-// different types, and different element types.
-//
+///
+/// \file
+/// This file defines generic set operations that may be used on set's of
+/// different types, and different element types.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_SETOPERATIONS_H
diff --git a/llvm/include/llvm/ADT/SetVector.h b/llvm/include/llvm/ADT/SetVector.h
index 82d5e98afb5d..08cf42f0b210 100644
--- a/llvm/include/llvm/ADT/SetVector.h
+++ b/llvm/include/llvm/ADT/SetVector.h
@@ -5,15 +5,16 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements a set that has insertion order iteration
-// characteristics. This is useful for keeping a set of things that need to be
-// visited later but in a deterministic order (insertion order). The interface
-// is purposefully minimal.
-//
-// This file defines SetVector and SmallSetVector, which performs no allocations
-// if the SetVector has less than a certain number of elements.
-//
+///
+/// \file
+/// This file implements a set that has insertion order iteration
+/// characteristics. This is useful for keeping a set of things that need to be
+/// visited later but in a deterministic order (insertion order). The interface
+/// is purposefully minimal.
+///
+/// This file defines SetVector and SmallSetVector, which performs no
+/// allocations if the SetVector has less than a certain number of elements.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_SETVECTOR_H
diff --git a/llvm/include/llvm/ADT/SmallBitVector.h b/llvm/include/llvm/ADT/SmallBitVector.h
index 17be317a10d7..86e304cc6c02 100644
--- a/llvm/include/llvm/ADT/SmallBitVector.h
+++ b/llvm/include/llvm/ADT/SmallBitVector.h
@@ -5,9 +5,10 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements the SmallBitVector class.
-//
+///
+/// \file
+/// This file implements the SmallBitVector class.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_SMALLBITVECTOR_H
diff --git a/llvm/include/llvm/ADT/SmallPtrSet.h b/llvm/include/llvm/ADT/SmallPtrSet.h
index 981b741669b0..ef6dae68b4a6 100644
--- a/llvm/include/llvm/ADT/SmallPtrSet.h
+++ b/llvm/include/llvm/ADT/SmallPtrSet.h
@@ -5,9 +5,10 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the SmallPtrSet class.  See the doxygen comment for
-// SmallPtrSetImplBase for more details on the algorithm used.
+///
+/// \file
+/// This file defines the SmallPtrSet class.  See the doxygen comment for
+/// SmallPtrSetImplBase for more details on the algorithm used.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/llvm/include/llvm/ADT/SmallSet.h b/llvm/include/llvm/ADT/SmallSet.h
index fe4f74eac85d..0eed85449c9d 100644
--- a/llvm/include/llvm/ADT/SmallSet.h
+++ b/llvm/include/llvm/ADT/SmallSet.h
@@ -5,9 +5,10 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the SmallSet class.
-//
+///
+/// \file
+/// This file defines the SmallSet class.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_SMALLSET_H
diff --git a/llvm/include/llvm/ADT/SmallString.h b/llvm/include/llvm/ADT/SmallString.h
index 81243af1f97d..874968f0a13f 100644
--- a/llvm/include/llvm/ADT/SmallString.h
+++ b/llvm/include/llvm/ADT/SmallString.h
@@ -5,9 +5,10 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the SmallString class.
-//
+///
+/// \file
+/// This file defines the SmallString class.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_SMALLSTRING_H
diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h
index 466acb83d466..a4a790323a6b 100644
--- a/llvm/include/llvm/ADT/SmallVector.h
+++ b/llvm/include/llvm/ADT/SmallVector.h
@@ -5,9 +5,10 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the SmallVector class.
-//
+///
+/// /file
+/// This file defines the SmallVector class.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_SMALLVECTOR_H
@@ -567,6 +568,16 @@ protected:
   explicit SmallVectorImpl(unsigned N)
       : SmallVectorTemplateBase<T>(N) {}
 
+  void assignRemote(SmallVectorImpl &&RHS) {
+    this->destroy_range(this->begin(), this->end());
+    if (!this->isSmall())
+      free(this->begin());
+    this->BeginX = RHS.BeginX;
+    this->Size = RHS.Size;
+    this->Capacity = RHS.Capacity;
+    RHS.resetToSmall();
+  }
+
 public:
   SmallVectorImpl(const SmallVectorImpl &) = delete;
 
@@ -1031,12 +1042,7 @@ SmallVectorImpl<T> &SmallVectorImpl<T>::operator=(SmallVectorImpl<T> &&RHS) {
 
   // If the RHS isn't small, clear this vector and then steal its buffer.
   if (!RHS.isSmall()) {
-    this->destroy_range(this->begin(), this->end());
-    if (!this->isSmall()) free(this->begin());
-    this->BeginX = RHS.BeginX;
-    this->Size = RHS.Size;
-    this->Capacity = RHS.Capacity;
-    RHS.resetToSmall();
+    this->assignRemote(std::move(RHS));
     return *this;
   }
 
@@ -1227,7 +1233,20 @@ public:
   }
 
   SmallVector &operator=(SmallVector &&RHS) {
-    SmallVectorImpl<T>::operator=(::std::move(RHS));
+    if (N) {
+      SmallVectorImpl<T>::operator=(::std::move(RHS));
+      return *this;
+    }
+    // SmallVectorImpl<T>::operator= does not leverage N==0. Optimize the
+    // case.
+    if (this == &RHS)
+      return *this;
+    if (RHS.empty()) {
+      this->destroy_range(this->begin(), this->end());
+      this->Size = 0;
+    } else {
+      this->assignRemote(std::move(RHS));
+    }
     return *this;
   }
 
diff --git a/llvm/include/llvm/ADT/SparseBitVector.h b/llvm/include/llvm/ADT/SparseBitVector.h
index 12850e14f4ed..a591896521ce 100644
--- a/llvm/include/llvm/ADT/SparseBitVector.h
+++ b/llvm/include/llvm/ADT/SparseBitVector.h
@@ -5,10 +5,11 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the SparseBitVector class.  See the doxygen comment for
-// SparseBitVector for more details on the algorithm used.
-//
+///
+/// \file
+/// This file defines the SparseBitVector class.  See the doxygen comment for
+/// SparseBitVector for more details on the algorithm used.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_SPARSEBITVECTOR_H
diff --git a/llvm/include/llvm/ADT/SparseMultiSet.h b/llvm/include/llvm/ADT/SparseMultiSet.h
index f63cef936433..ef2a5ea5ed71 100644
--- a/llvm/include/llvm/ADT/SparseMultiSet.h
+++ b/llvm/include/llvm/ADT/SparseMultiSet.h
@@ -5,16 +5,17 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the SparseMultiSet class, which adds multiset behavior to
-// the SparseSet.
-//
-// A sparse multiset holds a small number of objects identified by integer keys
-// from a moderately sized universe. The sparse multiset uses more memory than
-// other containers in order to provide faster operations. Any key can map to
-// multiple values. A SparseMultiSetNode class is provided, which serves as a
-// convenient base class for the contents of a SparseMultiSet.
-//
+///
+/// \file
+/// This file defines the SparseMultiSet class, which adds multiset behavior to
+/// the SparseSet.
+///
+/// A sparse multiset holds a small number of objects identified by integer keys
+/// from a moderately sized universe. The sparse multiset uses more memory than
+/// other containers in order to provide faster operations. Any key can map to
+/// multiple values. A SparseMultiSetNode class is provided, which serves as a
+/// convenient base class for the contents of a SparseMultiSet.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_SPARSEMULTISET_H
diff --git a/llvm/include/llvm/ADT/SparseSet.h b/llvm/include/llvm/ADT/SparseSet.h
index e66d76ad88e1..5c7087b1bffe 100644
--- a/llvm/include/llvm/ADT/SparseSet.h
+++ b/llvm/include/llvm/ADT/SparseSet.h
@@ -5,15 +5,16 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the SparseSet class derived from the version described in
-// Briggs, Torczon, "An efficient representation for sparse sets", ACM Letters
-// on Programming Languages and Systems, Volume 2 Issue 1-4, March-Dec.  1993.
-//
-// A sparse set holds a small number of objects identified by integer keys from
-// a moderately sized universe. The sparse set uses more memory than other
-// containers in order to provide faster operations.
-//
+///
+/// \file
+/// This file defines the SparseSet class derived from the version described in
+/// Briggs, Torczon, "An efficient representation for sparse sets", ACM Letters
+/// on Programming Languages and Systems, Volume 2 Issue 1-4, March-Dec.  1993.
+///
+/// A sparse set holds a small number of objects identified by integer keys from
+/// a moderately sized universe. The sparse set uses more memory than other
+/// containers in order to provide faster operations.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_SPARSESET_H
diff --git a/llvm/include/llvm/ADT/Statistic.h b/llvm/include/llvm/ADT/Statistic.h
index 528d2cdcf61b..c39e161bcbcd 100644
--- a/llvm/include/llvm/ADT/Statistic.h
+++ b/llvm/include/llvm/ADT/Statistic.h
@@ -5,21 +5,22 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the 'Statistic' class, which is designed to be an easy way
-// to expose various metrics from passes.  These statistics are printed at the
-// end of a run (from llvm_shutdown), when the -stats command line option is
-// passed on the command line.
-//
-// This is useful for reporting information like the number of instructions
-// simplified, optimized or removed by various transformations, like this:
-//
-// static Statistic NumInstsKilled("gcse", "Number of instructions killed");
-//
-// Later, in the code: ++NumInstsKilled;
-//
-// NOTE: Statistics *must* be declared as global variables.
-//
+///
+/// \file
+/// This file defines the 'Statistic' class, which is designed to be an easy way
+/// to expose various metrics from passes.  These statistics are printed at the
+/// end of a run (from llvm_shutdown), when the -stats command line option is
+/// passed on the command line.
+///
+/// This is useful for reporting information like the number of instructions
+/// simplified, optimized or removed by various transformations, like this:
+///
+/// static Statistic NumInstsKilled("gcse", "Number of instructions killed");
+///
+/// Later, in the code: ++NumInstsKilled;
+///
+/// NOTE: Statistics *must* be declared as global variables.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_STATISTIC_H
diff --git a/llvm/include/llvm/ADT/StringExtras.h b/llvm/include/llvm/ADT/StringExtras.h
index 81a0954226d6..ee6c33924e96 100644
--- a/llvm/include/llvm/ADT/StringExtras.h
+++ b/llvm/include/llvm/ADT/StringExtras.h
@@ -5,9 +5,10 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file contains some functions that are useful when dealing with strings.
-//
+///
+/// \file
+/// This file contains some functions that are useful when dealing with strings.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_STRINGEXTRAS_H
@@ -148,13 +149,14 @@ inline char toUpper(char x) {
   return x;
 }
 
-inline std::string utohexstr(uint64_t X, bool LowerCase = false) {
+inline std::string utohexstr(uint64_t X, bool LowerCase = false,
+                             unsigned Width = 0) {
   char Buffer[17];
   char *BufPtr = std::end(Buffer);
 
   if (X == 0) *--BufPtr = '0';
 
-  while (X) {
+  for (unsigned i = 0; Width ? (i < Width) : X; ++i) {
     unsigned char Mod = static_cast<unsigned char>(X) & 15;
     *--BufPtr = hexdigit(Mod, LowerCase);
     X >>= 4;
diff --git a/llvm/include/llvm/ADT/StringMap.h b/llvm/include/llvm/ADT/StringMap.h
index 562a2ff1a192..23248093c67e 100644
--- a/llvm/include/llvm/ADT/StringMap.h
+++ b/llvm/include/llvm/ADT/StringMap.h
@@ -5,9 +5,10 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the StringMap class.
-//
+///
+/// \file
+/// This file defines the StringMap class.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_STRINGMAP_H
diff --git a/llvm/include/llvm/ADT/StringMapEntry.h b/llvm/include/llvm/ADT/StringMapEntry.h
index 120d4f3ca4bc..6e13c8618c85 100644
--- a/llvm/include/llvm/ADT/StringMapEntry.h
+++ b/llvm/include/llvm/ADT/StringMapEntry.h
@@ -5,11 +5,12 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the StringMapEntry class - it is intended to be a low
-// dependency implementation detail of StringMap that is more suitable for
-// inclusion in public headers than StringMap.h itself is.
-//
+///
+/// \file
+/// This file defines the StringMapEntry class - it is intended to be a low
+/// dependency implementation detail of StringMap that is more suitable for
+/// inclusion in public headers than StringMap.h itself is.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_STRINGMAPENTRY_H
diff --git a/llvm/include/llvm/ADT/StringSet.h b/llvm/include/llvm/ADT/StringSet.h
index c4245175544b..4a499463d983 100644
--- a/llvm/include/llvm/ADT/StringSet.h
+++ b/llvm/include/llvm/ADT/StringSet.h
@@ -5,9 +5,10 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-//  StringSet - A set-like wrapper for the StringMap.
-//
+///
+/// \file
+///  StringSet - A set-like wrapper for the StringMap.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_STRINGSET_H
diff --git a/llvm/include/llvm/ADT/StringSwitch.h b/llvm/include/llvm/ADT/StringSwitch.h
index 4b7882d7ca10..95ab1df8d297 100644
--- a/llvm/include/llvm/ADT/StringSwitch.h
+++ b/llvm/include/llvm/ADT/StringSwitch.h
@@ -4,10 +4,11 @@
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //===----------------------------------------------------------------------===/
-//
-//  This file implements the StringSwitch template, which mimics a switch()
-//  statement whose cases are string literals.
-//
+///
+/// \file
+///  This file implements the StringSwitch template, which mimics a switch()
+///  statement whose cases are string literals.
+///
 //===----------------------------------------------------------------------===/
 #ifndef LLVM_ADT_STRINGSWITCH_H
 #define LLVM_ADT_STRINGSWITCH_H
diff --git a/llvm/include/llvm/ADT/Triple.h b/llvm/include/llvm/ADT/Triple.h
index 0f0a7b08b5d3..42277c013035 100644
--- a/llvm/include/llvm/ADT/Triple.h
+++ b/llvm/include/llvm/ADT/Triple.h
@@ -721,6 +721,41 @@ public:
            isOSBinFormatELF();
   }
 
+  /// Tests whether the target is T32.
+  bool isArmT32() const {
+    switch (getSubArch()) {
+    case Triple::ARMSubArch_v8m_baseline:
+    case Triple::ARMSubArch_v7s:
+    case Triple::ARMSubArch_v7k:
+    case Triple::ARMSubArch_v7ve:
+    case Triple::ARMSubArch_v6:
+    case Triple::ARMSubArch_v6m:
+    case Triple::ARMSubArch_v6k:
+    case Triple::ARMSubArch_v6t2:
+    case Triple::ARMSubArch_v5:
+    case Triple::ARMSubArch_v5te:
+    case Triple::ARMSubArch_v4t:
+      return false;
+    default:
+      return true;
+    }
+  }
+
+  /// Tests whether the target is an M-class.
+  bool isArmMClass() const {
+    switch (getSubArch()) {
+    case Triple::ARMSubArch_v6m:
+    case Triple::ARMSubArch_v7m:
+    case Triple::ARMSubArch_v7em:
+    case Triple::ARMSubArch_v8m_mainline:
+    case Triple::ARMSubArch_v8m_baseline:
+    case Triple::ARMSubArch_v8_1m_mainline:
+      return true;
+    default:
+      return false;
+    }
+  }
+
   /// Tests whether the target is AArch64 (little and big endian).
   bool isAArch64() const {
     return getArch() == Triple::aarch64 || getArch() == Triple::aarch64_be ||
diff --git a/llvm/include/llvm/ADT/TypeSwitch.h b/llvm/include/llvm/ADT/TypeSwitch.h
index 3b7598f3251d..892a7d43b317 100644
--- a/llvm/include/llvm/ADT/TypeSwitch.h
+++ b/llvm/include/llvm/ADT/TypeSwitch.h
@@ -5,10 +5,11 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-//  This file implements the TypeSwitch template, which mimics a switch()
-//  statement whose cases are type names.
-//
+///
+/// \file
+///  This file implements the TypeSwitch template, which mimics a switch()
+///  statement whose cases are type names.
+///
 //===-----------------------------------------------------------------------===/
 
 #ifndef LLVM_ADT_TYPESWITCH_H
diff --git a/llvm/include/llvm/ADT/Waymarking.h b/llvm/include/llvm/ADT/Waymarking.h
deleted file mode 100644
index 2efbc6f05495..000000000000
--- a/llvm/include/llvm/ADT/Waymarking.h
+++ /dev/null
@@ -1,322 +0,0 @@
-//===- Waymarking.h - Array waymarking algorithm ----------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Utility to backtrace an array's head, from a pointer into it. For the
-// backtrace to work, we use "Waymarks", which are special tags embedded into
-// the array's elements.
-//
-// A Tag of n-bits (in size) is composed as follows:
-//
-// bits: |   n-1   |             n-2 ... 0              |
-//       .---------.------------------------------------.
-//       |Stop Mask|(2^(n-1))-ary numeric system - digit|
-//       '---------'------------------------------------'
-//
-// Backtracing is done as follows:
-// Walk back (starting from a given pointer to an element into the array), until
-// a tag with a "Stop Mask" is reached. Then start calculating the "Offset" from
-// the array's head, by picking up digits along the way, until another stop is
-// reached. The "Offset" is then subtracted from the current pointer, and the
-// result is the array's head.
-// A special case - if we first encounter a Tag with a Stop and a zero digit,
-// then this is already the head.
-//
-// For example:
-// In case of 2 bits:
-//
-// Tags:
-// x0 - binary digit 0
-// x1 - binary digit 1
-// 1x - stop and calculate (s)
-//
-// Array:
-//         .---.---.---.---.---.---.---.---.---.---.---.---.---.---.---.---.
-// head -> |s0 |s1 | 0 |s1 | 0 | 0 |s1 | 1 | 1 |s1 | 0 | 1 | 0 |s1 | 0 | 1 |
-//         '---'---'---'---'---'---'---'---'---'---'---'---'---'---'---'---'
-//             |-1 |-2     |-4         |-7         |-10            |-14
-//          <_ |   |       |           |           |               |
-//          <_____ |       |           |           |               |
-//          <_____________ |           |           |               |
-//          <_________________________ |           |               |
-//          <_____________________________________ |               |
-//          <_____________________________________________________ |
-//
-//
-// In case of 3 bits:
-//
-// Tags:
-// x00 - quaternary digit 0
-// x01 - quaternary digit 1
-// x10 - quaternary digit 2
-// x11 - quaternary digit 3
-// 1xy - stop and calculate (s)
-//
-// Array:
-//         .---.---.---.---.---.---.---.---.---.---.---.---.---.---.---.---.
-// head -> |s0 |s1 |s2 |s3 | 0 |s1 | 2 |s1 | 0 |s2 | 2 |s2 | 0 |s3 | 2 |s3 |
-//         '---'---'---'---'---'---'---'---'---'---'---'---'---'---'---'---'
-//             |-1 |-2 |-3 |-4     |-6     |-8     |-10    |-12    |-14    |-16
-//          <_ |   |   |   |       |       |       |       |       |       |
-//          <_____ |   |   |       |       |       |       |       |       |
-//          <_________ |   |       |       |       |       |       |       |
-//          <_____________ |       |       |       |       |       |       |
-//          <_____________________ |       |       |       |       |       |
-//          <_____________________________ |       |       |       |       |
-//          <_____________________________________ |       |       |       |
-//          <_____________________________________________ |       |       |
-//          <_____________________________________________________ |       |
-//          <_____________________________________________________________ |
-//
-//
-// The API introduce 2 functions:
-// 1. fillWaymarks
-// 2. followWaymarks
-//
-// Example:
-//   int N = 10;
-//   int M = 5;
-//   int **A = new int *[N + M];   // Define the array.
-//   for (int I = 0; I < N + M; ++I)
-//     A[I] = new int(I);
-//
-//   fillWaymarks(A, A + N);       // Set the waymarks for the first N elements
-//                                 // of the array.
-//                                 // Note that it must be done AFTER we fill
-//                                 // the array's elements.
-//
-//   ...                           // Elements which are not in the range
-//                                 // [A, A+N) will not be marked, and we won't
-//                                 // be able to call followWaymarks on them.
-//
-//   ...                           // Elements which will be changed after the
-//                                 // call to fillWaymarks, will have to be
-//                                 // retagged.
-//
-//   fillWaymarks(A + N, A + N + M, N); // Set the waymarks of the remaining M
-//                                      // elements.
-//   ...
-//   int **It = A + N + 1;
-//   int **B = followWaymarks(It); // Find the head of the array containing It.
-//   assert(B == A);
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_ADT_WAYMARKING_H
-#define LLVM_ADT_WAYMARKING_H
-
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/PointerLikeTypeTraits.h"
-
-namespace llvm {
-
-namespace detail {
-
-template <unsigned NumBits> struct WaymarkingTraits {
-  enum : unsigned {
-    // The number of bits of a Waymarking Tag.
-    NUM_BITS = NumBits,
-
-    // A Tag is composed from a Mark and a Stop mask.
-    MARK_SIZE = NUM_BITS - 1,
-    STOP_MASK = (1 << MARK_SIZE),
-    MARK_MASK = (STOP_MASK - 1),
-    TAG_MASK = (MARK_MASK | STOP_MASK),
-
-    // The number of pre-computed tags (for fast fill).
-    NUM_STATIC_TAGS = 32
-  };
-
-private:
-  // Add a new tag, calculated from Count and Stop, to the Vals pack, while
-  // continuing recursively to decrease Len down to 0.
-  template <unsigned Len, bool Stop, unsigned Count, uint8_t... Vals>
-  struct AddTag;
-
-  // Delegate to the specialized AddTag according to the need of a Stop mask.
-  template <unsigned Len, unsigned Count, uint8_t... Vals> struct GenTag {
-    typedef
-        typename AddTag<Len, (Count <= MARK_MASK), Count, Vals...>::Xdata Xdata;
-  };
-
-  // Start adding tags while calculating the next Count, which is actually the
-  // number of already calculated tags (equivalent to the position in the
-  // array).
-  template <unsigned Len, uint8_t... Vals> struct GenOffset {
-    typedef typename GenTag<Len, sizeof...(Vals), Vals...>::Xdata Xdata;
-  };
-
-  // Add the tag and remove it from Count.
-  template <unsigned Len, unsigned Count, uint8_t... Vals>
-  struct AddTag<Len, false, Count, Vals...> {
-    typedef typename GenTag<Len - 1, (Count >> MARK_SIZE), Vals...,
-                            Count & MARK_MASK>::Xdata Xdata;
-  };
-
-  // We have reached the end of this Count, so start with a new Count.
-  template <unsigned Len, unsigned Count, uint8_t... Vals>
-  struct AddTag<Len, true, Count, Vals...> {
-    typedef typename GenOffset<Len - 1, Vals...,
-                               (Count & MARK_MASK) | STOP_MASK>::Xdata Xdata;
-  };
-
-  template <unsigned Count, uint8_t... Vals> struct TagsData {
-    // The remaining number for calculating the next tag, following the last one
-    // in Values.
-    static const unsigned Remain = Count;
-
-    // The array of ordered pre-computed Tags.
-    static const uint8_t Values[sizeof...(Vals)];
-  };
-
-  // Specialize the case when Len equals 0, as the recursion stop condition.
-  template <unsigned Count, uint8_t... Vals>
-  struct AddTag<0, false, Count, Vals...> {
-    typedef TagsData<Count, Vals...> Xdata;
-  };
-
-  template <unsigned Count, uint8_t... Vals>
-  struct AddTag<0, true, Count, Vals...> {
-    typedef TagsData<Count, Vals...> Xdata;
-  };
-
-public:
-  typedef typename GenOffset<NUM_STATIC_TAGS>::Xdata Tags;
-};
-
-template <unsigned NumBits>
-template <unsigned Count, uint8_t... Vals>
-const uint8_t WaymarkingTraits<NumBits>::TagsData<
-    Count, Vals...>::Values[sizeof...(Vals)] = {Vals...};
-
-} // end namespace detail
-
-/// This class is responsible for tagging (and retrieving the tag of) a given
-/// element of type T.
-template <class T, class WTraits = detail::WaymarkingTraits<
-                       PointerLikeTypeTraits<T>::NumLowBitsAvailable>>
-struct Waymarker {
-  using Traits = WTraits;
-  static void setWaymark(T &N, unsigned Tag) { N.setWaymark(Tag); }
-  static unsigned getWaymark(const T &N) { return N.getWaymark(); }
-};
-
-template <class T, class WTraits> struct Waymarker<T *, WTraits> {
-  using Traits = WTraits;
-  static void setWaymark(T *&N, unsigned Tag) {
-    reinterpret_cast<uintptr_t &>(N) |= static_cast<uintptr_t>(Tag);
-  }
-  static unsigned getWaymark(const T *N) {
-    return static_cast<unsigned>(reinterpret_cast<uintptr_t>(N)) &
-           Traits::TAG_MASK;
-  }
-};
-
-/// Sets up the waymarking algorithm's tags for a given range [Begin, End).
-///
-/// \param Begin The beginning of the range to mark with tags (inclusive).
-/// \param End The ending of the range to mark with tags (exclusive).
-/// \param Offset The position in the supposed tags array from which to start
-/// marking the given range.
-template <class TIter, class Marker = Waymarker<
-                           typename std::iterator_traits<TIter>::value_type>>
-void fillWaymarks(TIter Begin, TIter End, size_t Offset = 0) {
-  if (Begin == End)
-    return;
-
-  size_t Count = Marker::Traits::Tags::Remain;
-  if (Offset <= Marker::Traits::NUM_STATIC_TAGS) {
-    // Start by filling the pre-calculated tags, starting from the given offset.
-    while (Offset != Marker::Traits::NUM_STATIC_TAGS) {
-      Marker::setWaymark(*Begin, Marker::Traits::Tags::Values[Offset]);
-
-      ++Offset;
-      ++Begin;
-
-      if (Begin == End)
-        return;
-    }
-  } else {
-    // The given offset is larger than the number of pre-computed tags, so we
-    // must do it the hard way.
-    // Calculate the next remaining Count, as if we have filled the tags up to
-    // the given offset.
-    size_t Off = Marker::Traits::NUM_STATIC_TAGS;
-    do {
-      ++Off;
-
-      // If the count can fit into the tag, then the counting must stop.
-      if (Count <= Marker::Traits::MARK_MASK) {
-        Count = Off;
-      } else
-        Count >>= Marker::Traits::MARK_SIZE;
-    } while (Off != Offset);
-  }
-
-  // By now, we have the matching remaining Count for the current offset.
-  do {
-    ++Offset;
-
-    unsigned Tag = Count & Marker::Traits::MARK_MASK;
-
-    // If the count can fit into the tag, then the counting must stop.
-    if (Count <= Marker::Traits::MARK_MASK) {
-      Tag |= Marker::Traits::STOP_MASK;
-      Count = Offset;
-    } else
-      Count >>= Marker::Traits::MARK_SIZE;
-
-    Marker::setWaymark(*Begin, Tag);
-    ++Begin;
-  } while (Begin != End);
-}
-
-/// Sets up the waymarking algorithm's tags for a given range.
-///
-/// \param Range The range to mark with tags.
-/// \param Offset The position in the supposed tags array from which to start
-/// marking the given range.
-template <typename R, class Marker = Waymarker<typename std::remove_reference<
-                          decltype(*std::begin(std::declval<R &>()))>::type>>
-void fillWaymarks(R &&Range, size_t Offset = 0) {
-  return fillWaymarks<decltype(std::begin(std::declval<R &>())), Marker>(
-      adl_begin(Range), adl_end(Range), Offset);
-}
-
-/// Retrieves the element marked with tag of only STOP_MASK, by following the
-/// waymarks. This is the first element in a range passed to a previous call to
-/// \c fillWaymarks with \c Offset 0.
-///
-/// For the trivial usage of calling \c fillWaymarks(Array), and \I is an
-/// iterator inside \c Array, this function retrieves the head of \c Array, by
-/// following the waymarks.
-///
-/// \param I The iterator into an array which was marked by the waymarking tags
-/// (by a previous call to \c fillWaymarks).
-template <class TIter, class Marker = Waymarker<
-                           typename std::iterator_traits<TIter>::value_type>>
-TIter followWaymarks(TIter I) {
-  unsigned Tag;
-  do
-    Tag = Marker::getWaymark(*I--);
-  while (!(Tag & Marker::Traits::STOP_MASK));
-
-  // Special case for the first Use.
-  if (Tag != Marker::Traits::STOP_MASK) {
-    ptrdiff_t Offset = Tag & Marker::Traits::MARK_MASK;
-    while (!((Tag = Marker::getWaymark(*I)) & Marker::Traits::STOP_MASK)) {
-      Offset = (Offset << Marker::Traits::MARK_SIZE) + Tag;
-      --I;
-    }
-    I -= Offset;
-  }
-  return ++I;
-}
-
-} // end namespace llvm
-
-#endif // LLVM_ADT_WAYMARKING_H
diff --git a/llvm/include/llvm/ADT/bit.h b/llvm/include/llvm/ADT/bit.h
index d76bc6c6046c..49b27c89e5fe 100644
--- a/llvm/include/llvm/ADT/bit.h
+++ b/llvm/include/llvm/ADT/bit.h
@@ -5,9 +5,10 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements the C++20 <bit> header.
-//
+///
+/// \file
+/// This file implements the C++20 <bit> header.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_BIT_H
diff --git a/llvm/include/llvm/ADT/edit_distance.h b/llvm/include/llvm/ADT/edit_distance.h
index 4f5134008692..c480c1e7cd78 100644
--- a/llvm/include/llvm/ADT/edit_distance.h
+++ b/llvm/include/llvm/ADT/edit_distance.h
@@ -5,11 +5,12 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines a Levenshtein distance function that works for any two
-// sequences, with each element of each sequence being analogous to a character
-// in a string.
-//
+///
+/// \file
+/// This file defines a Levenshtein distance function that works for any two
+/// sequences, with each element of each sequence being analogous to a character
+/// in a string.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_EDIT_DISTANCE_H
diff --git a/llvm/include/llvm/ADT/ilist.h b/llvm/include/llvm/ADT/ilist.h
index b3aa26f2454d..9913b7cccbdd 100644
--- a/llvm/include/llvm/ADT/ilist.h
+++ b/llvm/include/llvm/ADT/ilist.h
@@ -5,19 +5,20 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines classes to implement an intrusive doubly linked list class
-// (i.e. each node of the list must contain a next and previous field for the
-// list.
-//
-// The ilist class itself should be a plug in replacement for list.  This list
-// replacement does not provide a constant time size() method, so be careful to
-// use empty() when you really want to know if it's empty.
-//
-// The ilist class is implemented as a circular list.  The list itself contains
-// a sentinel node, whose Next points at begin() and whose Prev points at
-// rbegin().  The sentinel node itself serves as end() and rend().
-//
+///
+/// \file
+/// This file defines classes to implement an intrusive doubly linked list class
+/// (i.e. each node of the list must contain a next and previous field for the
+/// list.
+///
+/// The ilist class itself should be a plug in replacement for list.  This list
+/// replacement does not provide a constant time size() method, so be careful to
+/// use empty() when you really want to know if it's empty.
+///
+/// The ilist class is implemented as a circular list.  The list itself contains
+/// a sentinel node, whose Next points at begin() and whose Prev points at
+/// rbegin().  The sentinel node itself serves as end() and rend().
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_ILIST_H
diff --git a/llvm/include/llvm/ADT/ilist_node.h b/llvm/include/llvm/ADT/ilist_node.h
index e040d9630a1e..7856b1c0d410 100644
--- a/llvm/include/llvm/ADT/ilist_node.h
+++ b/llvm/include/llvm/ADT/ilist_node.h
@@ -5,10 +5,11 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the ilist_node class template, which is a convenient
-// base class for creating classes that can be used with ilists.
-//
+///
+/// \file
+/// This file defines the ilist_node class template, which is a convenient
+/// base class for creating classes that can be used with ilists.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_ILIST_NODE_H
diff --git a/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h b/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h
index 043b1b7ca2dc..2dd2e7ca916d 100644
--- a/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h
+++ b/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h
@@ -26,6 +26,7 @@
 
 #include "llvm/IR/Function.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
 class AAResults;
diff --git a/llvm/include/llvm/Analysis/CycleAnalysis.h b/llvm/include/llvm/Analysis/CycleAnalysis.h
index e16b908d6a10..539d29eb5e9c 100644
--- a/llvm/include/llvm/Analysis/CycleAnalysis.h
+++ b/llvm/include/llvm/Analysis/CycleAnalysis.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/GenericCycleInfo.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/SSAContext.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
 extern template class GenericCycleInfo<SSAContext>;
diff --git a/llvm/include/llvm/Analysis/DDG.h b/llvm/include/llvm/Analysis/DDG.h
index 4ea589ec7efc..c5107da2a017 100644
--- a/llvm/include/llvm/Analysis/DDG.h
+++ b/llvm/include/llvm/Analysis/DDG.h
@@ -53,7 +53,7 @@ public:
 
   DDGNode() = delete;
   DDGNode(const NodeKind K) : Kind(K) {}
-  DDGNode(const DDGNode &N) : DDGNodeBase(N), Kind(N.Kind) {}
+  DDGNode(const DDGNode &N) = default;
   DDGNode(DDGNode &&N) : DDGNodeBase(std::move(N)), Kind(N.Kind) {}
   virtual ~DDGNode() = 0;
 
@@ -93,7 +93,7 @@ public:
   RootDDGNode() : DDGNode(NodeKind::Root) {}
   RootDDGNode(const RootDDGNode &N) = delete;
   RootDDGNode(RootDDGNode &&N) : DDGNode(std::move(N)) {}
-  ~RootDDGNode() {}
+  ~RootDDGNode() = default;
 
   /// Define classof to be able to use isa<>, cast<>, dyn_cast<>, etc.
   static bool classof(const DDGNode *N) {
@@ -113,11 +113,7 @@ public:
   SimpleDDGNode(SimpleDDGNode &&N);
   ~SimpleDDGNode();
 
-  SimpleDDGNode &operator=(const SimpleDDGNode &N) {
-    DDGNode::operator=(N);
-    InstList = N.InstList;
-    return *this;
-  }
+  SimpleDDGNode &operator=(const SimpleDDGNode &N) = default;
 
   SimpleDDGNode &operator=(SimpleDDGNode &&N) {
     DDGNode::operator=(std::move(N));
@@ -179,11 +175,7 @@ public:
   PiBlockDDGNode(PiBlockDDGNode &&N);
   ~PiBlockDDGNode();
 
-  PiBlockDDGNode &operator=(const PiBlockDDGNode &N) {
-    DDGNode::operator=(N);
-    NodeList = N.NodeList;
-    return *this;
-  }
+  PiBlockDDGNode &operator=(const PiBlockDDGNode &N) = default;
 
   PiBlockDDGNode &operator=(PiBlockDDGNode &&N) {
     DDGNode::operator=(std::move(N));
@@ -231,11 +223,7 @@ public:
   DDGEdge(DDGNode &N, EdgeKind K) : DDGEdgeBase(N), Kind(K) {}
   DDGEdge(const DDGEdge &E) : DDGEdgeBase(E), Kind(E.getKind()) {}
   DDGEdge(DDGEdge &&E) : DDGEdgeBase(std::move(E)), Kind(E.Kind) {}
-  DDGEdge &operator=(const DDGEdge &E) {
-    DDGEdgeBase::operator=(E);
-    Kind = E.Kind;
-    return *this;
-  }
+  DDGEdge &operator=(const DDGEdge &E) = default;
 
   DDGEdge &operator=(DDGEdge &&E) {
     DDGEdgeBase::operator=(std::move(E));
@@ -272,7 +260,7 @@ public:
       : Name(N), DI(DepInfo), Root(nullptr) {}
   DependenceGraphInfo(DependenceGraphInfo &&G)
       : Name(std::move(G.Name)), DI(std::move(G.DI)), Root(G.Root) {}
-  virtual ~DependenceGraphInfo() {}
+  virtual ~DependenceGraphInfo() = default;
 
   /// Return the label that is used to name this graph.
   StringRef getName() const { return Name; }
diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h
index 8c852e85b04a..638f4869d677 100644
--- a/llvm/include/llvm/Analysis/DependenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h
@@ -76,7 +76,7 @@ namespace llvm {
   public:
     Dependence(Instruction *Source, Instruction *Destination)
         : Src(Source), Dst(Destination) {}
-    virtual ~Dependence() {}
+    virtual ~Dependence() = default;
 
     /// Dependence::DVEntry - Each level in the distance/direction vector
     /// has a direction (or perhaps a union of several directions), and
diff --git a/llvm/include/llvm/Analysis/DependenceGraphBuilder.h b/llvm/include/llvm/Analysis/DependenceGraphBuilder.h
index 332829cbc8a9..e0dbdcdaa749 100644
--- a/llvm/include/llvm/Analysis/DependenceGraphBuilder.h
+++ b/llvm/include/llvm/Analysis/DependenceGraphBuilder.h
@@ -43,7 +43,7 @@ public:
   AbstractDependenceGraphBuilder(GraphType &G, DependenceInfo &D,
                                  const BasicBlockListType &BBs)
       : Graph(G), DI(D), BBList(BBs) {}
-  virtual ~AbstractDependenceGraphBuilder() {}
+  virtual ~AbstractDependenceGraphBuilder() = default;
 
   /// The main entry to the graph construction algorithm. It starts by
   /// creating nodes in increasing order of granularity and then
diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
index 7b81d5754930..90ab2833e428 100644
--- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
+++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
@@ -262,7 +262,20 @@ struct IRInstructionData
           llvm::hash_value(ID.Inst->getType()),
           llvm::hash_value(ID.getPredicate()),
           llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
-    else if (isa<CallInst>(ID.Inst)) {
+
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(ID.Inst)) {
+      // To hash intrinsics, we use the opcode, and types like the other
+      // instructions, but also, the Intrinsic ID, and the Name of the
+      // intrinsic.
+      Intrinsic::ID IntrinsicID = II->getIntrinsicID();
+      return llvm::hash_combine(
+          llvm::hash_value(ID.Inst->getOpcode()),
+          llvm::hash_value(ID.Inst->getType()), llvm::hash_value(IntrinsicID),
+          llvm::hash_value(*ID.CalleeName),
+          llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
+    }
+
+    if (isa<CallInst>(ID.Inst)) {
       std::string FunctionName = *ID.CalleeName;
       return llvm::hash_combine(
           llvm::hash_value(ID.Inst->getOpcode()),
@@ -270,6 +283,7 @@ struct IRInstructionData
           llvm::hash_value(ID.Inst->getType()), llvm::hash_value(FunctionName),
           llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
     }
+
     return llvm::hash_combine(
         llvm::hash_value(ID.Inst->getOpcode()),
         llvm::hash_value(ID.Inst->getType()),
@@ -499,7 +513,7 @@ struct IRInstructionMapper {
   /// be analyzed for similarity.
   struct InstructionClassification
       : public InstVisitor<InstructionClassification, InstrType> {
-    InstructionClassification() {}
+    InstructionClassification() = default;
 
     // TODO: Determine a scheme to resolve when the label is similar enough.
     InstrType visitBranchInst(BranchInst &BI) {
@@ -525,8 +539,17 @@ struct IRInstructionMapper {
     // analyzed for similarity as it has no bearing on the outcome of the
     // program.
     InstrType visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return Invisible; }
-    // TODO: Handle specific intrinsics.
-    InstrType visitIntrinsicInst(IntrinsicInst &II) { return Illegal; }
+    InstrType visitIntrinsicInst(IntrinsicInst &II) {
+      // These are disabled due to complications in the CodeExtractor when
+      // outlining these instructions.  For instance, It is unclear what we
+      // should do when moving only the start or end lifetime instruction into
+      // an outlined function. Also, assume-like intrinsics could be removed
+      // from the region, removing arguments, causing discrepencies in the
+      // number of inputs between different regions.
+      if (II.isLifetimeStartOrEnd() || II.isAssumeLikeIntrinsic())
+        return Illegal;
+      return EnableIntrinsics ? Legal : Illegal;
+    }
     // We only allow call instructions where the function has a name and
     // is not an indirect call.
     InstrType visitCallInst(CallInst &CI) {
@@ -553,6 +576,10 @@ struct IRInstructionMapper {
     // The flag variable that lets the classifier know whether we should
     // allow indirect calls to be considered legal instructions.
     bool EnableIndirectCalls = false;
+
+    // Flag that lets the classifier know whether we should allow intrinsics to
+    // be checked for similarity.
+    bool EnableIntrinsics = false;
   };
 
   /// Maps an Instruction to a member of InstrType.
@@ -939,10 +966,12 @@ class IRSimilarityIdentifier {
 public:
   IRSimilarityIdentifier(bool MatchBranches = true,
                          bool MatchIndirectCalls = true,
-                         bool MatchCallsWithName = false)
+                         bool MatchCallsWithName = false,
+                         bool MatchIntrinsics = true)
       : Mapper(&InstDataAllocator, &InstDataListAllocator),
         EnableBranches(MatchBranches), EnableIndirectCalls(MatchIndirectCalls),
-        EnableMatchingCallsByName(MatchCallsWithName) {}
+        EnableMatchingCallsByName(MatchCallsWithName),
+        EnableIntrinsics(MatchIntrinsics) {}
 
 private:
   /// Map the instructions in the module to unsigned integers, using mapping
@@ -1031,6 +1060,10 @@ private:
   /// convention, attributes and type signature.
   bool EnableMatchingCallsByName = true;
 
+  /// The flag variable that marks whether we should check intrinsics for
+  /// similarity.
+  bool EnableIntrinsics = true;
+
   /// The SimilarityGroups found with the most recent run of \ref
   /// findSimilarity. None if there is no recent run.
   Optional<SimilarityGroupList> SimilarityCandidates;
diff --git a/llvm/include/llvm/Analysis/IndirectCallVisitor.h b/llvm/include/llvm/Analysis/IndirectCallVisitor.h
index eb72f2c5d14d..0825e19ecd2d 100644
--- a/llvm/include/llvm/Analysis/IndirectCallVisitor.h
+++ b/llvm/include/llvm/Analysis/IndirectCallVisitor.h
@@ -19,7 +19,7 @@ namespace llvm {
 // Visitor class that finds all indirect call.
 struct PGOIndirectCallVisitor : public InstVisitor<PGOIndirectCallVisitor> {
   std::vector<CallBase *> IndirectCalls;
-  PGOIndirectCallVisitor() {}
+  PGOIndirectCallVisitor() = default;
 
   void visitCallBase(CallBase &Call) {
     if (Call.isIndirectCall())
diff --git a/llvm/include/llvm/Analysis/InlineOrder.h b/llvm/include/llvm/Analysis/InlineOrder.h
index feefa9b9ddd1..84252bcf1b06 100644
--- a/llvm/include/llvm/Analysis/InlineOrder.h
+++ b/llvm/include/llvm/Analysis/InlineOrder.h
@@ -26,7 +26,7 @@ public:
   using reference = T &;
   using const_reference = const T &;
 
-  virtual ~InlineOrder() {}
+  virtual ~InlineOrder() = default;
 
   virtual size_t size() = 0;
 
diff --git a/llvm/include/llvm/Analysis/LazyCallGraph.h b/llvm/include/llvm/Analysis/LazyCallGraph.h
index eb8f66bada59..c0404d37d04d 100644
--- a/llvm/include/llvm/Analysis/LazyCallGraph.h
+++ b/llvm/include/llvm/Analysis/LazyCallGraph.h
@@ -1203,7 +1203,7 @@ private:
   }
 };
 
-inline LazyCallGraph::Edge::Edge() {}
+inline LazyCallGraph::Edge::Edge() = default;
 inline LazyCallGraph::Edge::Edge(Node &N, Kind K) : Value(&N, K) {}
 
 inline LazyCallGraph::Edge::operator bool() const {
diff --git a/llvm/include/llvm/Analysis/LazyValueInfo.h b/llvm/include/llvm/Analysis/LazyValueInfo.h
index 57f732cc854b..754391e10630 100644
--- a/llvm/include/llvm/Analysis/LazyValueInfo.h
+++ b/llvm/include/llvm/Analysis/LazyValueInfo.h
@@ -38,7 +38,7 @@ class LazyValueInfo {
   void operator=(const LazyValueInfo&) = delete;
 public:
   ~LazyValueInfo();
-  LazyValueInfo() {}
+  LazyValueInfo() = default;
   LazyValueInfo(AssumptionCache *AC_, const DataLayout *DL_,
                 TargetLibraryInfo *TLI_)
       : AC(AC_), DL(DL_), TLI(TLI_) {}
diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h
index 3db501c51a17..09bf98d324ed 100644
--- a/llvm/include/llvm/Analysis/Loads.h
+++ b/llvm/include/llvm/Analysis/Loads.h
@@ -42,8 +42,7 @@ bool isDereferenceablePointer(const Value *V, Type *Ty,
 /// performs context-sensitive analysis and returns true if the pointer is
 /// dereferenceable at the specified instruction.
 bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty,
-                                        MaybeAlign Alignment,
-                                        const DataLayout &DL,
+                                        Align Alignment, const DataLayout &DL,
                                         const Instruction *CtxI = nullptr,
                                         const DominatorTree *DT = nullptr,
                                         const TargetLibraryInfo *TLI = nullptr);
diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
index b2326c4714dd..a0ffdb07a7ec 100644
--- a/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
@@ -535,7 +535,7 @@ public:
     DebugLoc End;
 
   public:
-    LocRange() {}
+    LocRange() = default;
     LocRange(DebugLoc Start) : Start(Start), End(Start) {}
     LocRange(DebugLoc Start, DebugLoc End)
         : Start(std::move(Start)), End(std::move(End)) {}
@@ -900,7 +900,7 @@ template <class BlockT, class LoopT> class LoopInfoBase {
   LoopInfoBase(const LoopInfoBase &) = delete;
 
 public:
-  LoopInfoBase() {}
+  LoopInfoBase() = default;
   ~LoopInfoBase() { releaseMemory(); }
 
   LoopInfoBase(LoopInfoBase &&Arg)
@@ -1092,7 +1092,7 @@ class LoopInfo : public LoopInfoBase<BasicBlock, Loop> {
   LoopInfo(const LoopInfo &) = delete;
 
 public:
-  LoopInfo() {}
+  LoopInfo() = default;
   explicit LoopInfo(const DominatorTreeBase<BasicBlock, false> &DomTree);
 
   LoopInfo(LoopInfo &&Arg) : BaseT(std::move(static_cast<BaseT &>(Arg))) {}
@@ -1336,6 +1336,10 @@ bool hasMustProgress(const Loop *L);
 /// be infinite without side effects without also being undefined)
 bool isMustProgress(const Loop *L);
 
+/// Return true if this loop can be assumed to run for a finite number of
+/// iterations.
+bool isFinite(const Loop *L);
+
 /// Return whether an MDNode might represent an access group.
 ///
 /// Access group metadata nodes have to be distinct and empty. Being
diff --git a/llvm/include/llvm/Analysis/MLInlineAdvisor.h b/llvm/include/llvm/Analysis/MLInlineAdvisor.h
index 05411d9c99a2..b1a81d5e7030 100644
--- a/llvm/include/llvm/Analysis/MLInlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/MLInlineAdvisor.h
@@ -15,6 +15,7 @@
 #include "llvm/IR/PassManager.h"
 
 #include <deque>
+#include <map>
 #include <memory>
 
 namespace llvm {
diff --git a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
index cb522cf731d3..feb22c250979 100644
--- a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -23,6 +23,7 @@
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/PredIteratorCache.h"
 #include "llvm/IR/ValueHandle.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
 
diff --git a/llvm/include/llvm/Analysis/MustExecute.h b/llvm/include/llvm/Analysis/MustExecute.h
index df489aaa534d..18a0bfee5730 100644
--- a/llvm/include/llvm/Analysis/MustExecute.h
+++ b/llvm/include/llvm/Analysis/MustExecute.h
@@ -281,9 +281,7 @@ struct MustBeExecutedIterator {
 
   using ExplorerTy = MustBeExecutedContextExplorer;
 
-  MustBeExecutedIterator(const MustBeExecutedIterator &Other)
-      : Visited(Other.Visited), Explorer(Other.Explorer),
-        CurInst(Other.CurInst), Head(Other.Head), Tail(Other.Tail) {}
+  MustBeExecutedIterator(const MustBeExecutedIterator &Other) = default;
 
   MustBeExecutedIterator(MustBeExecutedIterator &&Other)
       : Visited(std::move(Other.Visited)), Explorer(Other.Explorer),
@@ -299,7 +297,7 @@ struct MustBeExecutedIterator {
     return *this;
   }
 
-  ~MustBeExecutedIterator() {}
+  ~MustBeExecutedIterator() = default;
 
   /// Pre- and post-increment operators.
   ///{
diff --git a/llvm/include/llvm/Analysis/ObjCARCUtil.h b/llvm/include/llvm/Analysis/ObjCARCUtil.h
index 1d330ca58a87..385fa5422926 100644
--- a/llvm/include/llvm/Analysis/ObjCARCUtil.h
+++ b/llvm/include/llvm/Analysis/ObjCARCUtil.h
@@ -42,7 +42,7 @@ inline bool hasAttachedCallOpBundle(const CallBase *CB) {
 /// which is the address of the ARC runtime function.
 inline Optional<Function *> getAttachedARCFunction(const CallBase *CB) {
   auto B = CB->getOperandBundle(LLVMContext::OB_clang_arc_attachedcall);
-  if (!B.hasValue() || B->Inputs.size() == 0)
+  if (!B)
     return None;
 
   return cast<Function>(B->Inputs[0]);
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index 1e6dac44cf2b..b16aa7017719 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1111,9 +1111,11 @@ public:
   /// Simplify LHS and RHS in a comparison with predicate Pred. Return true
   /// iff any changes were made. If the operands are provably equal or
   /// unequal, LHS and RHS are set to the same value and Pred is set to either
-  /// ICMP_EQ or ICMP_NE.
+  /// ICMP_EQ or ICMP_NE. ControllingFiniteLoop is set if this comparison
+  /// controls the exit of a loop known to have a finite number of iterations.
   bool SimplifyICmpOperands(ICmpInst::Predicate &Pred, const SCEV *&LHS,
-                            const SCEV *&RHS, unsigned Depth = 0);
+                            const SCEV *&RHS, unsigned Depth = 0,
+                            bool ControllingFiniteLoop = false);
 
   /// Return the "disposition" of the given SCEV with respect to the given
   /// loop.
diff --git a/llvm/include/llvm/Analysis/SparsePropagation.h b/llvm/include/llvm/Analysis/SparsePropagation.h
index 27c58c0afa8a..6eb6d5518a41 100644
--- a/llvm/include/llvm/Analysis/SparsePropagation.h
+++ b/llvm/include/llvm/Analysis/SparsePropagation.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_ANALYSIS_SPARSEPROPAGATION_H
 #define LLVM_ANALYSIS_SPARSEPROPAGATION_H
 
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/Support/Debug.h"
 #include <set>
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
index 6e3e1380535e..17d1e3f770c1 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
@@ -254,15 +254,10 @@ public:
   }
 
   // Provide value semantics.
-  TargetLibraryInfo(const TargetLibraryInfo &TLI)
-      : Impl(TLI.Impl), OverrideAsUnavailable(TLI.OverrideAsUnavailable) {}
+  TargetLibraryInfo(const TargetLibraryInfo &TLI) = default;
   TargetLibraryInfo(TargetLibraryInfo &&TLI)
       : Impl(TLI.Impl), OverrideAsUnavailable(TLI.OverrideAsUnavailable) {}
-  TargetLibraryInfo &operator=(const TargetLibraryInfo &TLI) {
-    Impl = TLI.Impl;
-    OverrideAsUnavailable = TLI.OverrideAsUnavailable;
-    return *this;
-  }
+  TargetLibraryInfo &operator=(const TargetLibraryInfo &TLI) = default;
   TargetLibraryInfo &operator=(TargetLibraryInfo &&TLI) {
     Impl = TLI.Impl;
     OverrideAsUnavailable = TLI.OverrideAsUnavailable;
@@ -445,7 +440,7 @@ public:
   ///
   /// This will use the module's triple to construct the library info for that
   /// module.
-  TargetLibraryAnalysis() {}
+  TargetLibraryAnalysis() = default;
 
   /// Construct a library analysis with baseline Module-level info.
   ///
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 34ef9cc61c4f..7412e050322e 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1789,7 +1789,7 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
 
 public:
   Model(T Impl) : Impl(std::move(Impl)) {}
-  ~Model() override {}
+  ~Model() override = default;
 
   const DataLayout &getDataLayout() const override {
     return Impl.getDataLayout();
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 4b9ef7c57ffc..a32744f8d58b 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -42,8 +42,7 @@ protected:
 
 public:
   // Provide value semantics. MSVC requires that we spell all of these out.
-  TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg)
-      : DL(Arg.DL) {}
+  TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) = default;
   TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {}
 
   const DataLayout &getDataLayout() const { return DL; }
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index 8840929174d6..5d3b1270b538 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -372,7 +372,8 @@ enum {
   // was never defined for V1.
   ELFABIVERSION_AMDGPU_HSA_V2 = 0,
   ELFABIVERSION_AMDGPU_HSA_V3 = 1,
-  ELFABIVERSION_AMDGPU_HSA_V4 = 2
+  ELFABIVERSION_AMDGPU_HSA_V4 = 2,
+  ELFABIVERSION_AMDGPU_HSA_V5 = 3
 };
 
 #define ELF_RELOC(name, value) name = value,
diff --git a/llvm/include/llvm/BinaryFormat/MsgPackDocument.h b/llvm/include/llvm/BinaryFormat/MsgPackDocument.h
index 6d7aca89ee5b..448c7a4e0034 100644
--- a/llvm/include/llvm/BinaryFormat/MsgPackDocument.h
+++ b/llvm/include/llvm/BinaryFormat/MsgPackDocument.h
@@ -218,7 +218,7 @@ private:
 /// A DocNode that is a map.
 class MapDocNode : public DocNode {
 public:
-  MapDocNode() {}
+  MapDocNode() = default;
   MapDocNode(DocNode &N) : DocNode(N) { assert(getKind() == Type::Map); }
 
   // Map access methods.
@@ -248,7 +248,7 @@ public:
 /// A DocNode that is an array.
 class ArrayDocNode : public DocNode {
 public:
-  ArrayDocNode() {}
+  ArrayDocNode() = default;
   ArrayDocNode(DocNode &N) : DocNode(N) { assert(getKind() == Type::Array); }
 
   // Array access methods.
diff --git a/llvm/include/llvm/BinaryFormat/Swift.def b/llvm/include/llvm/BinaryFormat/Swift.def
new file mode 100644
index 000000000000..6160e2551432
--- /dev/null
+++ b/llvm/include/llvm/BinaryFormat/Swift.def
@@ -0,0 +1,26 @@
+//===- llvm/BinaryFormat/Swift.def - Swift definitions ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Macros for running through Swift enumerators.
+//
+//===----------------------------------------------------------------------===//
+
+#if !(defined HANDLE_SWIFT_SECTION)
+#error "Missing macro definition of HANDLE_SWIFT_SECTION"
+#endif
+
+#ifndef HANDLE_SWIFT_SECTION
+#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF)
+#endif
+
+HANDLE_SWIFT_SECTION(fieldmd, "__swift5_fieldmd", "swift5_fieldmd", ".sw5flmd")
+HANDLE_SWIFT_SECTION(assocty, "__swift5_assocty", "swift5_assocty", ".sw5asty")
+HANDLE_SWIFT_SECTION(builtin, "__swift5_builtin", "swift5_builtin", ".sw5bltn")
+HANDLE_SWIFT_SECTION(capture, "__swift5_capture", "swift5_capture", ".sw5cptr")
+HANDLE_SWIFT_SECTION(typeref, "__swift5_typeref", "swift5_typeref", ".sw5tyrf")
+HANDLE_SWIFT_SECTION(reflstr, "__swift5_reflstr", "swift5_reflstr", ".sw5rfst")
diff --git a/llvm/include/llvm/BinaryFormat/Swift.h b/llvm/include/llvm/BinaryFormat/Swift.h
new file mode 100644
index 000000000000..68c04f11196e
--- /dev/null
+++ b/llvm/include/llvm/BinaryFormat/Swift.h
@@ -0,0 +1,24 @@
+//===-- llvm/BinaryFormat/Swift.h ---Swift Constants-------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+
+#ifndef LLVM_BINARYFORMAT_SWIFT_H
+#define LLVM_BINARYFORMAT_SWIFT_H
+
+namespace llvm {
+namespace binaryformat {
+
+enum Swift5ReflectionSectionKind {
+#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF) KIND,
+#include "llvm/BinaryFormat/Swift.def"
+#undef HANDLE_SWIFT_SECTION
+  unknown,
+  last = unknown
+};
+} // end of namespace binaryformat
+} // end of namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Bitcode/BitcodeWriter.h b/llvm/include/llvm/Bitcode/BitcodeWriter.h
index 7ad2d37a2a35..96f25fce8ddb 100644
--- a/llvm/include/llvm/Bitcode/BitcodeWriter.h
+++ b/llvm/include/llvm/Bitcode/BitcodeWriter.h
@@ -139,7 +139,7 @@ class raw_ostream;
   ///
   /// ModHash is for use in ThinLTO incremental build, generated while the IR
   /// bitcode file writing.
-  void WriteThinLinkBitcodeToFile(const Module &M, raw_ostream &Out,
+  void writeThinLinkBitcodeToFile(const Module &M, raw_ostream &Out,
                                   const ModuleSummaryIndex &Index,
                                   const ModuleHash &ModHash);
 
@@ -148,7 +148,7 @@ class raw_ostream;
   /// writing the combined index file for ThinLTO. When writing a subset of the
   /// index for a distributed backend, provide the \p ModuleToSummariesForIndex
   /// map.
-  void WriteIndexToFile(const ModuleSummaryIndex &Index, raw_ostream &Out,
+  void writeIndexToFile(const ModuleSummaryIndex &Index, raw_ostream &Out,
                         const std::map<std::string, GVSummaryMapTy>
                             *ModuleToSummariesForIndex = nullptr);
 
@@ -161,7 +161,7 @@ class raw_ostream;
   /// If EmbedCmdline is set, the command line is also exported in
   /// the corresponding section (__LLVM,_cmdline / .llvmcmd) - even if CmdArgs
   /// were empty.
-  void EmbedBitcodeInModule(Module &M, MemoryBufferRef Buf, bool EmbedBitcode,
+  void embedBitcodeInModule(Module &M, MemoryBufferRef Buf, bool EmbedBitcode,
                             bool EmbedCmdline,
                             const std::vector<uint8_t> &CmdArgs);
 
diff --git a/llvm/include/llvm/Bitstream/BitstreamReader.h b/llvm/include/llvm/Bitstream/BitstreamReader.h
index 0393d1a51866..37b7c4d73cff 100644
--- a/llvm/include/llvm/Bitstream/BitstreamReader.h
+++ b/llvm/include/llvm/Bitstream/BitstreamReader.h
@@ -20,8 +20,7 @@
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MemoryBufferRef.h"
 #include <algorithm>
 #include <cassert>
 #include <climits>
diff --git a/llvm/include/llvm/CodeGen/DIE.h b/llvm/include/llvm/CodeGen/DIE.h
index 32df448b91a1..7f7372630dbe 100644
--- a/llvm/include/llvm/CodeGen/DIE.h
+++ b/llvm/include/llvm/CodeGen/DIE.h
@@ -886,8 +886,8 @@ class DIEUnit {
   DIE Die;
   /// The section this unit will be emitted in. This may or may not be set to
   /// a valid section depending on the client that is emitting DWARF.
-  MCSection *Section;
-  uint64_t Offset; /// .debug_info or .debug_types absolute section offset.
+  MCSection *Section = nullptr;
+  uint64_t Offset = 0; /// .debug_info or .debug_types absolute section offset.
 protected:
   virtual ~DIEUnit() = default;
 
diff --git a/llvm/include/llvm/CodeGen/FastISel.h b/llvm/include/llvm/CodeGen/FastISel.h
index 9c7e688da6a7..775698a66ada 100644
--- a/llvm/include/llvm/CodeGen/FastISel.h
+++ b/llvm/include/llvm/CodeGen/FastISel.h
@@ -217,12 +217,12 @@ protected:
   /// for use in the current block. It resets to EmitStartPt when it makes sense
   /// (for example, it's usually profitable to avoid function calls between the
   /// definition and the use)
-  MachineInstr *LastLocalValue;
+  MachineInstr *LastLocalValue = nullptr;
 
   /// The top most instruction in the current block that is allowed for
   /// emitting local variables. LastLocalValue resets to EmitStartPt when it
   /// makes sense (for example, on function calls)
-  MachineInstr *EmitStartPt;
+  MachineInstr *EmitStartPt = nullptr;
 
 public:
   virtual ~FastISel();
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
index 3a4b3ee18e1b..f9663fadb868 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -95,7 +95,7 @@ public:
             bool IsFixed = true)
       : ArgInfo(Regs, OrigValue.getType(), OrigIndex, Flags, IsFixed, &OrigValue) {}
 
-    ArgInfo() {}
+    ArgInfo() = default;
   };
 
   struct CallLoweringInfo {
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h b/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h
index 79d71b2c8982..70945fcecfe5 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h
@@ -30,7 +30,7 @@ class GISelChangeObserver {
   SmallPtrSet<MachineInstr *, 4> ChangingAllUsesOfReg;
 
 public:
-  virtual ~GISelChangeObserver() {}
+  virtual ~GISelChangeObserver() = default;
 
   /// An instruction is about to be erased.
   virtual void erasingInstr(MachineInstr &MI) = 0;
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h
index f6704df3f49d..3cacdc99dbf8 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h
@@ -465,7 +465,7 @@ private:
       ScalarSizeChangeStrategies[LastOp - FirstOp + 1];
   SmallVector<SizeChangeStrategy, 1>
       VectorElementSizeChangeStrategies[LastOp - FirstOp + 1];
-  bool TablesInitialized;
+  bool TablesInitialized = false;
 
   // Data structures used by getAction:
   SmallVector<SizeAndActionsVec, 1> ScalarActions[LastOp - FirstOp + 1];
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
index 9507c3411b5c..17cb53dd2d5b 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -403,9 +403,9 @@ public:
 
 class LegalizeRuleSet {
   /// When non-zero, the opcode we are an alias of
-  unsigned AliasOf;
+  unsigned AliasOf = 0;
   /// If true, there is another opcode that aliases this one
-  bool IsAliasedByAnother;
+  bool IsAliasedByAnother = false;
   SmallVector<LegalizeRule, 2> Rules;
 
 #ifndef NDEBUG
@@ -432,16 +432,6 @@ class LegalizeRuleSet {
     return TypeIdx;
   }
 
-  unsigned immIdx(unsigned ImmIdx) {
-    assert(ImmIdx <= (MCOI::OPERAND_LAST_GENERIC_IMM -
-                      MCOI::OPERAND_FIRST_GENERIC_IMM) &&
-           "Imm Index is out of bounds");
-#ifndef NDEBUG
-    ImmIdxsCovered.set(ImmIdx);
-#endif
-    return ImmIdx;
-  }
-
   void markAllIdxsAsCovered() {
 #ifndef NDEBUG
     TypeIdxsCovered.set();
@@ -556,7 +546,7 @@ class LegalizeRuleSet {
   }
 
 public:
-  LegalizeRuleSet() : AliasOf(0), IsAliasedByAnother(false) {}
+  LegalizeRuleSet() = default;
 
   bool isAliasedByAnother() { return IsAliasedByAnother; }
   void setIsAliasedByAnother() { IsAliasedByAnother = true; }
@@ -568,6 +558,16 @@ public:
   }
   unsigned getAlias() const { return AliasOf; }
 
+  unsigned immIdx(unsigned ImmIdx) {
+    assert(ImmIdx <= (MCOI::OPERAND_LAST_GENERIC_IMM -
+                      MCOI::OPERAND_FIRST_GENERIC_IMM) &&
+           "Imm Index is out of bounds");
+#ifndef NDEBUG
+    ImmIdxsCovered.set(ImmIdx);
+#endif
+    return ImmIdx;
+  }
+
   /// The instruction is legal if predicate is true.
   LegalizeRuleSet &legalIf(LegalityPredicate Predicate) {
     // We have no choice but conservatively assume that the free-form
@@ -824,11 +824,22 @@ public:
   LegalizeRuleSet &customForCartesianProduct(std::initializer_list<LLT> Types) {
     return actionForCartesianProduct(LegalizeAction::Custom, Types);
   }
+  /// The instruction is custom when type indexes 0 and 1 are both in their
+  /// respective lists.
   LegalizeRuleSet &
   customForCartesianProduct(std::initializer_list<LLT> Types0,
                             std::initializer_list<LLT> Types1) {
     return actionForCartesianProduct(LegalizeAction::Custom, Types0, Types1);
   }
+  /// The instruction is custom when when type indexes 0, 1, and 2 are all in
+  /// their respective lists.
+  LegalizeRuleSet &
+  customForCartesianProduct(std::initializer_list<LLT> Types0,
+                            std::initializer_list<LLT> Types1,
+                            std::initializer_list<LLT> Types2) {
+    return actionForCartesianProduct(LegalizeAction::Custom, Types0, Types1,
+                                     Types2);
+  }
 
   /// Unconditionally custom lower.
   LegalizeRuleSet &custom() {
diff --git a/llvm/include/llvm/CodeGen/IntrinsicLowering.h b/llvm/include/llvm/CodeGen/IntrinsicLowering.h
index 8593f54f3961..06512f2dc560 100644
--- a/llvm/include/llvm/CodeGen/IntrinsicLowering.h
+++ b/llvm/include/llvm/CodeGen/IntrinsicLowering.h
@@ -24,10 +24,10 @@ class DataLayout;
 class IntrinsicLowering {
   const DataLayout &DL;
 
-  bool Warned;
+  bool Warned = false;
 
 public:
-  explicit IntrinsicLowering(const DataLayout &DL) : DL(DL), Warned(false) {}
+  explicit IntrinsicLowering(const DataLayout &DL) : DL(DL) {}
 
   /// Replace a call to the specified intrinsic function.
   /// If an intrinsic function must be implemented by the code generator
diff --git a/llvm/include/llvm/CodeGen/LoopTraversal.h b/llvm/include/llvm/CodeGen/LoopTraversal.h
index e5810ef1ef26..93d140cabd0d 100644
--- a/llvm/include/llvm/CodeGen/LoopTraversal.h
+++ b/llvm/include/llvm/CodeGen/LoopTraversal.h
@@ -98,7 +98,7 @@ public:
                      bool Done = true)
         : MBB(BB), PrimaryPass(Primary), IsDone(Done) {}
   };
-  LoopTraversal() {}
+  LoopTraversal() = default;
 
   /// Identifies basic blocks that are part of loops and should to be
   ///  visited twice and returns efficient traversal order for all the blocks.
diff --git a/llvm/include/llvm/CodeGen/MIRFormatter.h b/llvm/include/llvm/CodeGen/MIRFormatter.h
index 3f145ff224ad..fb276ff117af 100644
--- a/llvm/include/llvm/CodeGen/MIRFormatter.h
+++ b/llvm/include/llvm/CodeGen/MIRFormatter.h
@@ -30,7 +30,7 @@ public:
   typedef function_ref<bool(StringRef::iterator Loc, const Twine &)>
       ErrorCallbackType;
 
-  MIRFormatter() {}
+  MIRFormatter() = default;
   virtual ~MIRFormatter() = default;
 
   /// Implement target specific printing for machine operand immediate value, so
diff --git a/llvm/include/llvm/CodeGen/MIRYamlMapping.h b/llvm/include/llvm/CodeGen/MIRYamlMapping.h
index 05a375bc251b..02eb5d24271d 100644
--- a/llvm/include/llvm/CodeGen/MIRYamlMapping.h
+++ b/llvm/include/llvm/CodeGen/MIRYamlMapping.h
@@ -392,7 +392,7 @@ struct FrameIndex {
   bool IsFixed;
   SMRange SourceRange;
 
-  FrameIndex() {}
+  FrameIndex() = default;
   FrameIndex(int FI, const llvm::MachineFrameInfo &MFI);
 
   Expected<int> getFI(const llvm::MachineFrameInfo &MFI) const;
@@ -671,7 +671,7 @@ template <> struct MappingTraits<MachineFrameInfo> {
 /// Targets should override this in a way that mirrors the implementation of
 /// llvm::MachineFunctionInfo.
 struct MachineFunctionInfo {
-  virtual ~MachineFunctionInfo() {}
+  virtual ~MachineFunctionInfo() = default;
   virtual void mappingImpl(IO &YamlIO) {}
 };
 
diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
index 5df468102a8a..864ca73180af 100644
--- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
@@ -49,14 +49,13 @@ class CalleeSavedInfo {
   /// The long-term solution is to model the liveness of callee-saved registers
   /// by implicit uses on the return instructions, however, the required
   /// changes in the ARM backend would be quite extensive.
-  bool Restored;
+  bool Restored = true;
   /// Flag indicating whether the register is spilled to stack or another
   /// register.
-  bool SpilledToReg;
+  bool SpilledToReg = false;
 
 public:
-  explicit CalleeSavedInfo(unsigned R, int FI = 0)
-  : Reg(R), FrameIdx(FI), Restored(true), SpilledToReg(false) {}
+  explicit CalleeSavedInfo(unsigned R, int FI = 0) : Reg(R), FrameIdx(FI) {}
 
   // Accessors.
   Register getReg()                        const { return Reg; }
@@ -180,14 +179,14 @@ private:
     /// If true, the object has been sign-extended.
     bool isSExt = false;
 
-    uint8_t SSPLayout;
+    uint8_t SSPLayout = SSPLK_None;
 
     StackObject(uint64_t Size, Align Alignment, int64_t SPOffset,
                 bool IsImmutable, bool IsSpillSlot, const AllocaInst *Alloca,
                 bool IsAliased, uint8_t StackID = 0)
         : SPOffset(SPOffset), Size(Size), Alignment(Alignment),
           isImmutable(IsImmutable), isSpillSlot(IsSpillSlot), StackID(StackID),
-          Alloca(Alloca), isAliased(IsAliased), SSPLayout(SSPLK_None) {}
+          Alloca(Alloca), isAliased(IsAliased) {}
   };
 
   /// The alignment of the stack.
diff --git a/llvm/include/llvm/CodeGen/MachineModuleSlotTracker.h b/llvm/include/llvm/CodeGen/MachineModuleSlotTracker.h
index 0bd0a31abcae..fc7635edd82c 100644
--- a/llvm/include/llvm/CodeGen/MachineModuleSlotTracker.h
+++ b/llvm/include/llvm/CodeGen/MachineModuleSlotTracker.h
@@ -22,7 +22,7 @@ class Module;
 class MachineModuleSlotTracker : public ModuleSlotTracker {
   const Function &TheFunction;
   const MachineModuleInfo &TheMMI;
-  unsigned MDNStartSlot, MDNEndSlot;
+  unsigned MDNStartSlot = 0, MDNEndSlot = 0;
 
   void processMachineFunctionMetadata(AbstractSlotTrackerStorage *AST,
                                       const MachineFunction &MF);
diff --git a/llvm/include/llvm/CodeGen/MachineOperand.h b/llvm/include/llvm/CodeGen/MachineOperand.h
index f17904d54cdd..eded28183ea2 100644
--- a/llvm/include/llvm/CodeGen/MachineOperand.h
+++ b/llvm/include/llvm/CodeGen/MachineOperand.h
@@ -162,7 +162,7 @@ private:
 
   /// ParentMI - This is the instruction that this operand is embedded into.
   /// This is valid for all operand types, when the operand is in an instr.
-  MachineInstr *ParentMI;
+  MachineInstr *ParentMI = nullptr;
 
   /// Contents union - This contains the payload for the various operand types.
   union ContentsUnion {
@@ -200,7 +200,7 @@ private:
   } Contents;
 
   explicit MachineOperand(MachineOperandType K)
-    : OpKind(K), SubReg_TargetFlags(0), ParentMI(nullptr) {
+      : OpKind(K), SubReg_TargetFlags(0) {
     // Assert that the layout is what we expect. It's easy to grow this object.
     static_assert(alignof(MachineOperand) <= alignof(int64_t),
                   "MachineOperand shouldn't be more than 8 byte aligned");
diff --git a/llvm/include/llvm/CodeGen/MachineOutliner.h b/llvm/include/llvm/CodeGen/MachineOutliner.h
index 3e597e728fef..08b76295dbf2 100644
--- a/llvm/include/llvm/CodeGen/MachineOutliner.h
+++ b/llvm/include/llvm/CodeGen/MachineOutliner.h
@@ -124,7 +124,7 @@ public:
             unsigned FunctionIdx, unsigned Flags)
       : StartIdx(StartIdx), Len(Len), FirstInst(FirstInst), LastInst(LastInst),
         MBB(MBB), FunctionIdx(FunctionIdx), Flags(Flags) {}
-  Candidate() {}
+  Candidate() = default;
 
   /// Used to ensure that \p Candidates are outlined in an order that
   /// preserves the start and end indices of other \p Candidates.
@@ -218,7 +218,7 @@ public:
       C.Benefit = B;
   }
 
-  OutlinedFunction() {}
+  OutlinedFunction() = default;
 };
 } // namespace outliner
 } // namespace llvm
diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
index dbabfe5f0f32..94ae6fe02e9c 100644
--- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -84,7 +84,7 @@ private:
 
   /// The flag is true upon \p UpdatedCSRs initialization
   /// and false otherwise.
-  bool IsUpdatedCSRsInitialized;
+  bool IsUpdatedCSRsInitialized = false;
 
   /// Contains the updated callee saved register list.
   /// As opposed to the static list defined in register info,
diff --git a/llvm/include/llvm/CodeGen/ReplaceWithVeclib.h b/llvm/include/llvm/CodeGen/ReplaceWithVeclib.h
index d0fadd55d481..7c0ebe7191e4 100644
--- a/llvm/include/llvm/CodeGen/ReplaceWithVeclib.h
+++ b/llvm/include/llvm/CodeGen/ReplaceWithVeclib.h
@@ -16,6 +16,7 @@
 
 #include "llvm/IR/PassManager.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
 struct ReplaceWithVeclib : public PassInfoMixin<ReplaceWithVeclib> {
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
index 94ba6ad91517..9cea197724cc 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
@@ -46,8 +46,8 @@ public:
   MachineRegisterInfo *RegInfo;
   SelectionDAG *CurDAG;
   std::unique_ptr<SelectionDAGBuilder> SDB;
-  AAResults *AA;
-  GCFunctionInfo *GFI;
+  AAResults *AA = nullptr;
+  GCFunctionInfo *GFI = nullptr;
   CodeGenOpt::Level OptLevel;
   const TargetInstrInfo *TII;
   const TargetLowering *TLI;
@@ -199,7 +199,7 @@ public:
 protected:
   /// DAGSize - Size of DAG being instruction selected.
   ///
-  unsigned DAGSize;
+  unsigned DAGSize = 0;
 
   /// ReplaceUses - replace all uses of the old node F with the use
   /// of the new node T.
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index cd62c47abce9..04c6b50197d4 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -741,11 +741,9 @@ public:
     using reference = value_type &;
 
     use_iterator() = default;
-    use_iterator(const use_iterator &I) : Op(I.Op) {}
+    use_iterator(const use_iterator &I) = default;
 
-    bool operator==(const use_iterator &x) const {
-      return Op == x.Op;
-    }
+    bool operator==(const use_iterator &x) const { return Op == x.Op; }
     bool operator!=(const use_iterator &x) const {
       return !operator==(x);
     }
diff --git a/llvm/include/llvm/CodeGen/SlotIndexes.h b/llvm/include/llvm/CodeGen/SlotIndexes.h
index b2133de93ea2..e8d618a24f9b 100644
--- a/llvm/include/llvm/CodeGen/SlotIndexes.h
+++ b/llvm/include/llvm/CodeGen/SlotIndexes.h
@@ -319,7 +319,7 @@ class raw_ostream;
     using IndexList = ilist<IndexListEntry>;
     IndexList indexList;
 
-    MachineFunction *mf;
+    MachineFunction *mf = nullptr;
 
     using Mi2IndexMap = DenseMap<const MachineInstr *, SlotIndex>;
     Mi2IndexMap mi2iMap;
diff --git a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
index bc22d7789856..47bedd9befc8 100644
--- a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
+++ b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
@@ -183,12 +183,12 @@ struct JumpTableHeader {
   const Value *SValue;
   MachineBasicBlock *HeaderBB;
   bool Emitted;
-  bool FallthroughUnreachable;
+  bool FallthroughUnreachable = false;
 
   JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H,
                   bool E = false)
       : First(std::move(F)), Last(std::move(L)), SValue(SV), HeaderBB(H),
-        Emitted(E), FallthroughUnreachable(false) {}
+        Emitted(E) {}
 };
 using JumpTableBlock = std::pair<JumpTableHeader, JumpTable>;
 
@@ -218,14 +218,14 @@ struct BitTestBlock {
   BitTestInfo Cases;
   BranchProbability Prob;
   BranchProbability DefaultProb;
-  bool FallthroughUnreachable;
+  bool FallthroughUnreachable = false;
 
   BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT, bool E,
                bool CR, MachineBasicBlock *P, MachineBasicBlock *D,
                BitTestInfo C, BranchProbability Pr)
       : First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg),
         RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D),
-        Cases(std::move(C)), Prob(Pr), FallthroughUnreachable(false) {}
+        Cases(std::move(C)), Prob(Pr) {}
 };
 
 /// Return the range of values within a range.
diff --git a/llvm/include/llvm/CodeGen/TargetCallingConv.h b/llvm/include/llvm/CodeGen/TargetCallingConv.h
index 7713dd0800c0..62365330379d 100644
--- a/llvm/include/llvm/CodeGen/TargetCallingConv.h
+++ b/llvm/include/llvm/CodeGen/TargetCallingConv.h
@@ -53,9 +53,9 @@ namespace ISD {
     unsigned IsCopyElisionCandidate : 1; ///< Argument copy elision candidate
     unsigned IsPointer : 1;
 
-    unsigned ByValOrByRefSize; ///< Byval or byref struct size
+    unsigned ByValOrByRefSize = 0; ///< Byval or byref struct size
 
-    unsigned PointerAddrSpace; ///< Address space of pointer argument
+    unsigned PointerAddrSpace = 0; ///< Address space of pointer argument
 
   public:
     ArgFlagsTy()
@@ -65,8 +65,7 @@ namespace ISD {
           IsSwiftError(0), IsCFGuardTarget(0), IsHva(0), IsHvaStart(0),
           IsSecArgPass(0), MemAlign(0), OrigAlign(0),
           IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0),
-          IsCopyElisionCandidate(0), IsPointer(0), ByValOrByRefSize(0),
-          PointerAddrSpace(0) {
+          IsCopyElisionCandidate(0), IsPointer(0) {
       static_assert(sizeof(*this) == 3 * sizeof(unsigned), "flags are too big");
     }
 
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index bec191570594..3861648a5feb 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3485,13 +3485,19 @@ public:
   bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
                             DAGCombinerInfo &DCI) const;
 
+  /// Helper wrapper around SimplifyDemandedBits.
+  /// Adds Op back to the worklist upon success.
+  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
+                            const APInt &DemandedElts,
+                            DAGCombinerInfo &DCI) const;
+
   /// More limited version of SimplifyDemandedBits that can be used to "look
   /// through" ops that don't contribute to the DemandedBits/DemandedElts -
   /// bitwise ops etc.
   SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits,
                                           const APInt &DemandedElts,
                                           SelectionDAG &DAG,
-                                          unsigned Depth) const;
+                                          unsigned Depth = 0) const;
 
   /// Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all
   /// elements.
@@ -3676,11 +3682,11 @@ public:
 
   /// Return if the N is a constant or constant vector equal to the true value
   /// from getBooleanContents().
-  bool isConstTrueVal(const SDNode *N) const;
+  bool isConstTrueVal(SDValue N) const;
 
   /// Return if the N is a constant or constant vector equal to the false value
   /// from getBooleanContents().
-  bool isConstFalseVal(const SDNode *N) const;
+  bool isConstFalseVal(SDValue N) const;
 
   /// Return if \p N is a True value when extended to \p VT.
   bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const;
diff --git a/llvm/include/llvm/CodeGen/VirtRegMap.h b/llvm/include/llvm/CodeGen/VirtRegMap.h
index 4953d88340b1..42e8d294a637 100644
--- a/llvm/include/llvm/CodeGen/VirtRegMap.h
+++ b/llvm/include/llvm/CodeGen/VirtRegMap.h
@@ -39,10 +39,10 @@ class TargetInstrInfo;
     };
 
   private:
-    MachineRegisterInfo *MRI;
-    const TargetInstrInfo *TII;
-    const TargetRegisterInfo *TRI;
-    MachineFunction *MF;
+    MachineRegisterInfo *MRI = nullptr;
+    const TargetInstrInfo *TII = nullptr;
+    const TargetRegisterInfo *TRI = nullptr;
+    MachineFunction *MF = nullptr;
 
     /// Virt2PhysMap - This is a virtual to physical register
     /// mapping. Each virtual register is required to have an entry in
@@ -72,8 +72,7 @@ class TargetInstrInfo;
     static char ID;
 
     VirtRegMap()
-        : MachineFunctionPass(ID), MRI(nullptr), TII(nullptr), TRI(nullptr),
-          MF(nullptr), Virt2PhysMap(NO_PHYS_REG),
+        : MachineFunctionPass(ID), Virt2PhysMap(NO_PHYS_REG),
           Virt2StackSlotMap(NO_STACK_SLOT), Virt2SplitMap(0) {}
     VirtRegMap(const VirtRegMap &) = delete;
     VirtRegMap &operator=(const VirtRegMap &) = delete;
diff --git a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h b/llvm/include/llvm/DWARFLinker/DWARFStreamer.h
index 9a5c6bcaf83f..fc8c59904cfb 100644
--- a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h
+++ b/llvm/include/llvm/DWARFLinker/DWARFStreamer.h
@@ -9,6 +9,7 @@
 #ifndef LLVM_DWARFLINKER_DWARFSTREAMER_H
 #define LLVM_DWARFLINKER_DWARFSTREAMER_H
 
+#include "llvm/BinaryFormat/Swift.h"
 #include "llvm/CodeGen/AccelTable.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/DWARFLinker/DWARFLinker.h"
@@ -48,7 +49,7 @@ public:
       : OutFile(OutFile), OutFileType(OutFileType), Translator(Translator),
         ErrorHandler(Error), WarningHandler(Warning) {}
 
-  bool init(Triple TheTriple);
+  bool init(Triple TheTriple, StringRef Swift5ReflectionSegmentName);
 
   /// Dump the file to the disk.
   void finish();
@@ -85,6 +86,11 @@ public:
   /// Emit the swift_ast section stored in \p Buffer.
   void emitSwiftAST(StringRef Buffer);
 
+  /// Emit the swift reflection section stored in \p Buffer.
+  void emitSwiftReflectionSection(
+      llvm::binaryformat::Swift5ReflectionSectionKind ReflSectionKind,
+      StringRef Buffer, uint32_t Alignment, uint32_t Size);
+
   /// Emit debug_ranges for \p FuncRange by translating the
   /// original \p Entries.
   void emitRangesEntries(
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
index 536583e20640..8167aaaeffb5 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
@@ -535,7 +535,7 @@ public:
       : Kind(K), IsDWARF64(IsDWARF64), Offset(Offset), Length(Length),
         CFIs(CodeAlign, DataAlign, Arch) {}
 
-  virtual ~FrameEntry() {}
+  virtual ~FrameEntry() = default;
 
   FrameKind getKind() const { return Kind; }
   uint64_t getOffset() const { return Offset; }
diff --git a/llvm/include/llvm/DebugInfo/GSYM/StringTable.h b/llvm/include/llvm/DebugInfo/GSYM/StringTable.h
index 6dd90499c203..d920335d373e 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/StringTable.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/StringTable.h
@@ -20,7 +20,7 @@ namespace gsym {
 /// string at offset zero. Strings must be UTF8 NULL terminated strings.
 struct StringTable {
   StringRef Data;
-  StringTable() {}
+  StringTable() = default;
   StringTable(StringRef D) : Data(D) {}
   StringRef operator[](size_t Offset) const { return getString(Offset); }
   StringRef getString(uint32_t Offset) const {
diff --git a/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h b/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h
index 779dc885372d..91748e15ba65 100644
--- a/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h
+++ b/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h
@@ -39,8 +39,8 @@ struct Request {
 
 class DIPrinter {
 public:
-  DIPrinter() {}
-  virtual ~DIPrinter() {}
+  DIPrinter() = default;
+  virtual ~DIPrinter() = default;
 
   virtual void print(const Request &Request, const DILineInfo &Info) = 0;
   virtual void print(const Request &Request, const DIInliningInfo &Info) = 0;
diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h
index 28545ed06836..760319544a02 100644
--- a/llvm/include/llvm/Demangle/ItaniumDemangle.h
+++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h
@@ -1,15 +1,15 @@
-// Do not edit! -*- read-only -*-
-// See README.txt for instructions
-//===------------------------- ItaniumDemangle.h ----------------*- C++ -*-===//
-//
+//===--- ItaniumDemangle.h -----------*- mode:c++;eval:(read-only-mode) -*-===//
+//       Do not edit! See README.txt.
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
-// Generic itanium demangler library. This file has two byte-per-byte identical
-// copies in the source tree, one in libcxxabi, and the other in llvm.
+// Generic itanium demangler library.
+// There are two copies of this file in the source tree.  The one under
+// libcxxabi is the original and the one under llvm is the copy.  Use
+// cp-to-llvm.sh to update the copy.  See README.txt for more details.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/llvm/include/llvm/Demangle/README.txt b/llvm/include/llvm/Demangle/README.txt
index 514ff6dd16f2..76470f61f959 100644
--- a/llvm/include/llvm/Demangle/README.txt
+++ b/llvm/include/llvm/Demangle/README.txt
@@ -4,41 +4,50 @@ Itanium Name Demangler Library
 Introduction
 ------------
 
-This directory contains the generic itanium name demangler library. The main
-purpose of the library is to demangle C++ symbols, i.e. convert the string
-"_Z1fv" into "f()". You can also use the CRTP base ManglingParser to perform
-some simple analysis on the mangled name, or (in LLVM) use the opaque
-ItaniumPartialDemangler to query the demangled AST.
+This directory contains the generic itanium name demangler
+library. The main purpose of the library is to demangle C++ symbols,
+i.e. convert the string "_Z1fv" into "f()". You can also use the CRTP
+base ManglingParser to perform some simple analysis on the mangled
+name, or (in LLVM) use the opaque ItaniumPartialDemangler to query the
+demangled AST.
 
 Why are there multiple copies of the this library in the source tree?
 ---------------------------------------------------------------------
 
-This directory is mirrored between libcxxabi/demangle and
-llvm/include/llvm/Demangle. The simple reason for this is that both projects
-need to demangle symbols, but neither can depend on each other. libcxxabi needs
-the demangler to implement __cxa_demangle, which is part of the itanium ABI
-spec. LLVM needs a copy for a bunch of places, but doesn't want to use the
-system's __cxa_demangle because it a) might not be available (i.e., on Windows),
-and b) probably isn't that up-to-date on the latest language features.
-
-The copy of the demangler in LLVM has some extra stuff that aren't needed in
-libcxxabi (ie, the MSVC demangler, ItaniumPartialDemangler), which depend on the
-shared generic components. Despite these differences, we want to keep the "core"
-generic demangling library identical between both copies to simplify development
-and testing.
-
-If you're working on the generic library, then do the work first in libcxxabi,
-then run the cp-to-llvm.sh script in src/demangle. This script takes as an
-argument the path to llvm, and re-copies the changes you made to libcxxabi over.
-Note that this script just blindly overwrites all changes to the generic library
-in llvm, so be careful.
-
-Because the core demangler needs to work in libcxxabi, everything needs to be
-declared in an anonymous namespace (see DEMANGLE_NAMESPACE_BEGIN), and you can't
-introduce any code that depends on the libcxx dylib.
-
-Hopefully, when LLVM becomes a monorepo, we can de-duplicate this code, and have
-both LLVM and libcxxabi depend on a shared demangler library.
+The canonical sources are in libcxxabi/src/demangle and some of the
+files are copied to llvm/include/llvm/Demangle.  The simple reason for
+this comes from before the monorepo, and both [sub]projects need to
+demangle symbols, but neither can depend on each other.
+
+* libcxxabi needs the demangler to implement __cxa_demangle, which is
+  part of the itanium ABI spec.
+
+* LLVM needs a copy for a bunch of places, and cannot rely on the
+  system's __cxa_demangle because it a) might not be available (i.e.,
+  on Windows), and b) may not be up-to-date on the latest language
+  features.
+
+The copy of the demangler in LLVM has some extra stuff that aren't
+needed in libcxxabi (ie, the MSVC demangler, ItaniumPartialDemangler),
+which depend on the shared generic components. Despite these
+differences, we want to keep the "core" generic demangling library
+identical between both copies to simplify development and testing.
+
+If you're working on the generic library, then do the work first in
+libcxxabi, then run the cp-to-llvm.sh script in src/demangle. This
+script takes as an optional argument the path to llvm, and copies the
+changes you made to libcxxabi over.  Note that this script just
+blindly overwrites all changes to the generic library in llvm, so be
+careful.
+
+Because the core demangler needs to work in libcxxabi, everything
+needs to be declared in an anonymous namespace (see
+DEMANGLE_NAMESPACE_BEGIN), and you can't introduce any code that
+depends on the libcxx dylib.
+
+FIXME: Now that LLVM is a monorepo, it should be possible to
+de-duplicate this code, and have both LLVM and libcxxabi depend on a
+shared demangler library.
 
 Testing
 -------
diff --git a/llvm/include/llvm/Demangle/StringView.h b/llvm/include/llvm/Demangle/StringView.h
index 323282f69c26..6bbb8837fed1 100644
--- a/llvm/include/llvm/Demangle/StringView.h
+++ b/llvm/include/llvm/Demangle/StringView.h
@@ -1,7 +1,5 @@
-// Do not edit! -*- read-only -*-
-// See README.txt for instructions
-//===--- StringView.h -------------------------------------------*- C++ -*-===//
-//
+//===--- StringView.h ----------------*- mode:c++;eval:(read-only-mode) -*-===//
+//       Do not edit! See README.txt.
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
@@ -9,6 +7,9 @@
 //===----------------------------------------------------------------------===//
 //
 // FIXME: Use std::string_view instead when we support C++17.
+// There are two copies of this file in the source tree.  The one under
+// libcxxabi is the original and the one under llvm is the copy.  Use
+// cp-to-llvm.sh to update the copy.  See README.txt for more details.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/llvm/include/llvm/Demangle/Utility.h b/llvm/include/llvm/Demangle/Utility.h
index bec019da8680..1cf7e8f1df45 100644
--- a/llvm/include/llvm/Demangle/Utility.h
+++ b/llvm/include/llvm/Demangle/Utility.h
@@ -1,14 +1,15 @@
-// Do not edit! -*- read-only -*-
-// See README.txt for instructions
-//===--- Utility.h ----------------------------------------------*- C++ -*-===//
-//
+//===--- Utility.h -------------------*- mode:c++;eval:(read-only-mode) -*-===//
+//       Do not edit! See README.txt.
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
-// Provide some utility classes for use in the demangler(s).
+// Provide some utility classes for use in the demangler.
+// There are two copies of this file in the source tree.  The one in libcxxabi
+// is the original and the one in llvm is the copy.  Use cp-to-llvm.sh to update
+// the copy.  See README.txt for more details.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
index ddbb3e76f145..25f1349f15f2 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
@@ -1636,7 +1636,7 @@ using AsyncLookupResult = DenseMap<StringRef, JITEvaluatedSymbol>;
 /// or an error if resolution failed.
 class JITLinkAsyncLookupContinuation {
 public:
-  virtual ~JITLinkAsyncLookupContinuation() {}
+  virtual ~JITLinkAsyncLookupContinuation() = default;
   virtual void run(Expected<AsyncLookupResult> LR) = 0;
 
 private:
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
index d0168f79e3d8..c4647148f287 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
@@ -686,7 +686,7 @@ public:
   MaterializationUnit(Interface I)
       : SymbolFlags(std::move(I.SymbolFlags)),
         InitSymbol(std::move(I.InitSymbol)) {}
-  virtual ~MaterializationUnit() {}
+  virtual ~MaterializationUnit() = default;
 
   /// Return the name of this materialization unit. Useful for debugging
   /// output.
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h b/llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h
index d2bf8330695f..253b1c876782 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h
@@ -29,7 +29,7 @@ class GDBJITDebugInfoRegistrationPlugin : public ObjectLinkingLayer::Plugin {
 public:
   class DebugSectionSynthesizer {
   public:
-    virtual ~DebugSectionSynthesizer() {}
+    virtual ~DebugSectionSynthesizer() = default;
     virtual Error startSynthesis() = 0;
     virtual Error completeSynthesisAndRegister() = 0;
   };
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h
index 940d0d28ae83..ac7051b5b75c 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h
@@ -34,7 +34,7 @@ class ExecutionSession;
 class DebugObjectRegistrar {
 public:
   virtual Error registerDebugObject(ExecutorAddrRange TargetMem) = 0;
-  virtual ~DebugObjectRegistrar() {}
+  virtual ~DebugObjectRegistrar() = default;
 };
 
 /// Use ExecutorProcessControl to register debug objects locally or in a remote
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h b/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h
index c57264e59655..8c287f9fec0e 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h
@@ -35,7 +35,7 @@ class Task : public RTTIExtends<Task, RTTIRoot> {
 public:
   static char ID;
 
-  virtual ~Task() {}
+  virtual ~Task() = default;
 
   /// Description of the task to be performed. Used for logging.
   virtual void printDescription(raw_ostream &OS) = 0;
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
index 2178acc90e2c..bee90281e086 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
@@ -113,6 +113,9 @@ enum class AddressSpace : unsigned {
   Local = 5,
 };
 
+/// \note This needs to be kept in sync with interop.h enum kmp_interop_type_t.:
+enum class OMPInteropType { Unknown, Target, TargetSync };
+
 } // end namespace omp
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 85dd28ec3159..f60debe8411c 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1003,6 +1003,55 @@ public:
                                       llvm::ConstantInt *Size,
                                       const llvm::Twine &Name = Twine(""));
 
+  /// Create a runtime call for __tgt_interop_init
+  ///
+  /// \param Loc The insert and source location description.
+  /// \param InteropVar variable to be allocated
+  /// \param InteropType type of interop operation
+  /// \param Device devide to which offloading will occur
+  /// \param NumDependences  number of dependence variables
+  /// \param DependenceAddress pointer to dependence variables
+  /// \param HaveNowaitClause does nowait clause exist
+  ///
+  /// \returns CallInst to the __tgt_interop_init call
+  CallInst *createOMPInteropInit(const LocationDescription &Loc,
+                                 Value *InteropVar,
+                                 omp::OMPInteropType InteropType, Value *Device,
+                                 Value *NumDependences,
+                                 Value *DependenceAddress,
+                                 bool HaveNowaitClause);
+
+  /// Create a runtime call for __tgt_interop_destroy
+  ///
+  /// \param Loc The insert and source location description.
+  /// \param InteropVar variable to be allocated
+  /// \param Device devide to which offloading will occur
+  /// \param NumDependences  number of dependence variables
+  /// \param DependenceAddress pointer to dependence variables
+  /// \param HaveNowaitClause does nowait clause exist
+  ///
+  /// \returns CallInst to the __tgt_interop_destroy call
+  CallInst *createOMPInteropDestroy(const LocationDescription &Loc,
+                                    Value *InteropVar, Value *Device,
+                                    Value *NumDependences,
+                                    Value *DependenceAddress,
+                                    bool HaveNowaitClause);
+
+  /// Create a runtime call for __tgt_interop_use
+  ///
+  /// \param Loc The insert and source location description.
+  /// \param InteropVar variable to be allocated
+  /// \param Device devide to which offloading will occur
+  /// \param NumDependences  number of dependence variables
+  /// \param DependenceAddress pointer to dependence variables
+  /// \param HaveNowaitClause does nowait clause exist
+  ///
+  /// \returns CallInst to the __tgt_interop_use call
+  CallInst *createOMPInteropUse(const LocationDescription &Loc,
+                                Value *InteropVar, Value *Device,
+                                Value *NumDependences, Value *DependenceAddress,
+                                bool HaveNowaitClause);
+
   /// The `omp target` interface
   ///
   /// For more information about the usage of this interface,
@@ -1167,6 +1216,7 @@ private:
   ///
   /// \param AllocIP	  Instruction to create AllocaInst before.
   /// \param X			    The target atomic pointer to be updated
+  /// \param XElemTy    The element type of the atomic pointer.
   /// \param Expr		    The value to update X with.
   /// \param AO			    Atomic ordering of the generated atomic
   ///                   instructions.
@@ -1183,12 +1233,11 @@ private:
   ///
   /// \returns A pair of the old value of X before the update, and the value
   ///          used for the update.
-  std::pair<Value *, Value *> emitAtomicUpdate(Instruction *AllocIP, Value *X,
-                                               Value *Expr, AtomicOrdering AO,
-                                               AtomicRMWInst::BinOp RMWOp,
-                                               AtomicUpdateCallbackTy &UpdateOp,
-                                               bool VolatileX,
-                                               bool IsXBinopExpr);
+  std::pair<Value *, Value *>
+  emitAtomicUpdate(Instruction *AllocIP, Value *X, Type *XElemTy, Value *Expr,
+                   AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
+                   AtomicUpdateCallbackTy &UpdateOp, bool VolatileX,
+                   bool IsXBinopExpr);
 
   /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 .
   ///
@@ -1200,6 +1249,7 @@ public:
   /// a struct to pack relevant information while generating atomic Ops
   struct AtomicOpValue {
     Value *Var = nullptr;
+    Type *ElemTy = nullptr;
     bool IsSigned = false;
     bool IsVolatile = false;
   };
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index d2b70edd4d87..0c3cb3f43105 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -386,6 +386,13 @@ __OMP_RTL(__kmpc_aligned_alloc, false, VoidPtr, /* Int */ Int32, SizeTy, SizeTy,
           VoidPtr)
 __OMP_RTL(__kmpc_free, false, Void, /* Int */ Int32, VoidPtr, VoidPtr)
 
+__OMP_RTL(__tgt_interop_init, false, Void, IdentPtr, Int32, VoidPtrPtr, Int64,
+          Int32, Int32, VoidPtr, Int32)
+__OMP_RTL(__tgt_interop_destroy, false, Void, IdentPtr, Int32, VoidPtrPtr,
+          Int32, Int32, VoidPtr, Int32)
+__OMP_RTL(__tgt_interop_use, false, Void, IdentPtr, Int32, VoidPtrPtr, Int32,
+          Int32, VoidPtr, Int32)
+
 __OMP_RTL(__kmpc_init_allocator, false, /* omp_allocator_handle_t */ VoidPtr,
           /* Int */ Int32, /* omp_memespace_handle_t */ VoidPtr,
           /* Int */ Int32, /* omp_alloctrait_t */ VoidPtr)
diff --git a/llvm/include/llvm/IR/AbstractCallSite.h b/llvm/include/llvm/IR/AbstractCallSite.h
index 31df4c75b6e7..69048554a05c 100644
--- a/llvm/include/llvm/IR/AbstractCallSite.h
+++ b/llvm/include/llvm/IR/AbstractCallSite.h
@@ -14,11 +14,11 @@
 #ifndef LLVM_IR_ABSTRACTCALLSITE_H
 #define LLVM_IR_ABSTRACTCALLSITE_H
 
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Use.h"
-#include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/Casting.h"
 #include <cassert>
diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h
index 5e2cfe6d81ac..74b60f1e3d05 100644
--- a/llvm/include/llvm/IR/Attributes.h
+++ b/llvm/include/llvm/IR/Attributes.h
@@ -20,7 +20,6 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/Alignment.h"
 #include "llvm/Support/PointerLikeTypeTraits.h"
diff --git a/llvm/include/llvm/IR/CFG.h b/llvm/include/llvm/IR/CFG.h
index b872e2626981..0ee584f8af7e 100644
--- a/llvm/include/llvm/IR/CFG.h
+++ b/llvm/include/llvm/IR/CFG.h
@@ -22,6 +22,7 @@
 #include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/Casting.h"
@@ -31,7 +32,6 @@
 
 namespace llvm {
 
-class BasicBlock;
 class Instruction;
 class Use;
 
diff --git a/llvm/include/llvm/IR/DIBuilder.h b/llvm/include/llvm/IR/DIBuilder.h
index f36c9e620d43..fc461fc3f49f 100644
--- a/llvm/include/llvm/IR/DIBuilder.h
+++ b/llvm/include/llvm/IR/DIBuilder.h
@@ -21,7 +21,6 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/TrackingMDRef.h"
 #include "llvm/Support/Casting.h"
diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h
index ba2568042c41..96569179060f 100644
--- a/llvm/include/llvm/IR/DebugInfoMetadata.h
+++ b/llvm/include/llvm/IR/DebugInfoMetadata.h
@@ -33,7 +33,6 @@
 #include <cstddef>
 #include <cstdint>
 #include <iterator>
-#include <type_traits>
 #include <vector>
 
 // Helper macros for defining get() overrides.
diff --git a/llvm/include/llvm/IR/DiagnosticInfo.h b/llvm/include/llvm/IR/DiagnosticInfo.h
index 73b0be43e136..1ea1d9787d61 100644
--- a/llvm/include/llvm/IR/DiagnosticInfo.h
+++ b/llvm/include/llvm/IR/DiagnosticInfo.h
@@ -15,14 +15,16 @@
 #define LLVM_IR_DIAGNOSTICINFO_H
 
 #include "llvm-c/Types.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/Support/CBindingWrapping.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TypeSize.h"
-#include "llvm/Support/YAMLTraits.h"
 #include <algorithm>
 #include <cstdint>
 #include <functional>
@@ -33,13 +35,15 @@ namespace llvm {
 
 // Forward declarations.
 class DiagnosticPrinter;
+class DIFile;
+class DISubprogram;
 class CallInst;
 class Function;
 class Instruction;
 class InstructionCost;
-class LLVMContext;
 class Module;
-class SMDiagnostic;
+class Type;
+class Value;
 
 /// Defines the different supported severity of a diagnostic.
 enum DiagnosticSeverity : char {
@@ -1049,18 +1053,20 @@ static DiagnosticSeverity getDiagnosticSeverity(SourceMgr::DiagKind DK) {
 /// Diagnostic information for SMDiagnostic reporting.
 class DiagnosticInfoSrcMgr : public DiagnosticInfo {
   const SMDiagnostic &Diagnostic;
+  StringRef ModName;
 
   // For inlineasm !srcloc translation.
   bool InlineAsmDiag;
   unsigned LocCookie;
 
 public:
-  DiagnosticInfoSrcMgr(const SMDiagnostic &Diagnostic,
+  DiagnosticInfoSrcMgr(const SMDiagnostic &Diagnostic, StringRef ModName,
                        bool InlineAsmDiag = true, unsigned LocCookie = 0)
       : DiagnosticInfo(DK_SrcMgr, getDiagnosticSeverity(Diagnostic.getKind())),
-        Diagnostic(Diagnostic), InlineAsmDiag(InlineAsmDiag),
+        Diagnostic(Diagnostic), ModName(ModName), InlineAsmDiag(InlineAsmDiag),
         LocCookie(LocCookie) {}
 
+  StringRef getModuleName() const { return ModName; }
   bool isInlineAsmDiag() const { return InlineAsmDiag; }
   const SMDiagnostic &getSMDiag() const { return Diagnostic; }
   unsigned getLocCookie() const { return LocCookie; }
diff --git a/llvm/include/llvm/IR/Dominators.h b/llvm/include/llvm/IR/Dominators.h
index 475355af5647..d13a5856df3b 100644
--- a/llvm/include/llvm/IR/Dominators.h
+++ b/llvm/include/llvm/IR/Dominators.h
@@ -14,23 +14,34 @@
 #ifndef LLVM_IR_DOMINATORS_H
 #define LLVM_IR_DOMINATORS_H
 
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/IR/Use.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/CFGDiff.h"
+#include "llvm/Support/CFGUpdate.h"
 #include "llvm/Support/GenericDomTree.h"
+#include "llvm/Support/GenericDomTreeConstruction.h"
 #include <utility>
+#include <vector>
 
 namespace llvm {
 
 class Function;
 class Instruction;
 class Module;
+class Value;
 class raw_ostream;
+template <class GraphType> struct GraphTraits;
 
 extern template class DomTreeNodeBase<BasicBlock>;
 extern template class DominatorTreeBase<BasicBlock, false>; // DomTree
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index 53f517480ca1..a1789759960d 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -28,12 +28,13 @@
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/FPEnv.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
@@ -44,7 +45,6 @@
 #include "llvm/Support/CBindingWrapping.h"
 #include "llvm/Support/Casting.h"
 #include <cassert>
-#include <cstddef>
 #include <cstdint>
 #include <functional>
 #include <utility>
@@ -52,7 +52,6 @@
 namespace llvm {
 
 class APInt;
-class MDNode;
 class Use;
 
 /// This provides the default implementation of the IRBuilder
diff --git a/llvm/include/llvm/IR/IRPrintingPasses.h b/llvm/include/llvm/IR/IRPrintingPasses.h
index 2e62be7cd1ec..3fba5b81e37a 100644
--- a/llvm/include/llvm/IR/IRPrintingPasses.h
+++ b/llvm/include/llvm/IR/IRPrintingPasses.h
@@ -24,6 +24,11 @@
 namespace llvm {
 class raw_ostream;
 class StringRef;
+class Function;
+class FunctionPass;
+class Module;
+class ModulePass;
+class Pass;
 
 /// Create and return a pass that writes the module to the specified
 /// \c raw_ostream.
diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h
index b3d2a2c8ed9d..589926c0faf1 100644
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -1393,10 +1393,13 @@ public:
   const Use &getCalledOperandUse() const { return Op<CalledOperandOpEndIdx>(); }
   Use &getCalledOperandUse() { return Op<CalledOperandOpEndIdx>(); }
 
-  /// Returns the function called, or null if this is an
-  /// indirect function invocation.
+  /// Returns the function called, or null if this is an indirect function
+  /// invocation or the function signature does not match the call signature.
   Function *getCalledFunction() const {
-    return dyn_cast_or_null<Function>(getCalledOperand());
+    if (auto *F = dyn_cast_or_null<Function>(getCalledOperand()))
+      if (F->getValueType() == getFunctionType())
+        return F;
+    return nullptr;
   }
 
   /// Return true if the callsite is an indirect call.
diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h
index 9878082ffffa..1937ffd36f7b 100644
--- a/llvm/include/llvm/IR/Instruction.h
+++ b/llvm/include/llvm/IR/Instruction.h
@@ -25,8 +25,6 @@
 #include "llvm/IR/Value.h"
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/Casting.h"
-#include <algorithm>
-#include <cassert>
 #include <cstdint>
 #include <utility>
 
diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
index 84ebb461ebef..5929cff3b4fb 100644
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -27,11 +27,9 @@
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallingConv.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/OperandTraits.h"
@@ -52,7 +50,6 @@ namespace llvm {
 class APInt;
 class ConstantInt;
 class DataLayout;
-class LLVMContext;
 
 //===----------------------------------------------------------------------===//
 //                                AllocaInst Class
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index f4e571e86493..01dada25a285 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -1194,6 +1194,17 @@ public:
   ConstantInt *getIndex() const;
 };
 
+/// This represents the llvm.instrprof.cover intrinsic.
+class InstrProfCoverInst : public InstrProfInstBase {
+public:
+  static bool classof(const IntrinsicInst *I) {
+    return I->getIntrinsicID() == Intrinsic::instrprof_cover;
+  }
+  static bool classof(const Value *V) {
+    return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+  }
+};
+
 /// This represents the llvm.instrprof.increment intrinsic.
 class InstrProfIncrementInst : public InstrProfInstBase {
 public:
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 3e40bbf39dd4..f5248e82ad21 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -582,6 +582,10 @@ def int_experimental_noalias_scope_decl
 def int_stackprotector : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_ptrptr_ty], []>;
 def int_stackguard : DefaultAttrsIntrinsic<[llvm_ptr_ty], [], []>;
 
+// A cover for instrumentation based profiling.
+def int_instrprof_cover : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty,
+                                         llvm_i32_ty, llvm_i32_ty]>;
+
 // A counter increment for instrumentation based profiling.
 def int_instrprof_increment : Intrinsic<[],
                                         [llvm_ptr_ty, llvm_i64_ty,
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index e610c28a5923..a65ddff07a29 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -897,6 +897,14 @@ def int_aarch64_stgp  : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llv
     [IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>]>;
 }
 
+//===----------------------------------------------------------------------===//
+// Memory Operations (MOPS) Intrinsics
+let TargetPrefix = "aarch64" in {
+  // Sizes are chosen to correspond to the llvm.memset intrinsic: ptr, i8, i64
+  def int_aarch64_mops_memset_tag : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i8_ty, llvm_i64_ty],
+      [IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>]>;
+}
+
 // Transactional Memory Extension (TME) Intrinsics
 let TargetPrefix = "aarch64" in {
 def int_aarch64_tstart  : GCCBuiltin<"__builtin_arm_tstart">,
diff --git a/llvm/include/llvm/IR/LLVMContext.h b/llvm/include/llvm/IR/LLVMContext.h
index d165a405ce22..446bcecf1c64 100644
--- a/llvm/include/llvm/IR/LLVMContext.h
+++ b/llvm/include/llvm/IR/LLVMContext.h
@@ -36,7 +36,6 @@ template <typename T> class StringMapEntry;
 class StringRef;
 class Twine;
 class LLVMRemarkStreamer;
-class raw_ostream;
 
 namespace remarks {
 class RemarkStreamer;
diff --git a/llvm/include/llvm/IR/LLVMRemarkStreamer.h b/llvm/include/llvm/IR/LLVMRemarkStreamer.h
index e7627e993370..094ead273eed 100644
--- a/llvm/include/llvm/IR/LLVMRemarkStreamer.h
+++ b/llvm/include/llvm/IR/LLVMRemarkStreamer.h
@@ -14,14 +14,20 @@
 #ifndef LLVM_IR_LLVMREMARKSTREAMER_H
 #define LLVM_IR_LLVMREMARKSTREAMER_H
 
-#include "llvm/IR/DiagnosticInfo.h"
-#include "llvm/Remarks/RemarkStreamer.h"
+#include "llvm/Remarks/Remark.h"
 #include "llvm/Support/Error.h"
-#include "llvm/Support/ToolOutputFile.h"
 #include <memory>
 #include <string>
 
 namespace llvm {
+
+class DiagnosticInfoOptimizationBase;
+class LLVMContext;
+class ToolOutputFile;
+namespace remarks {
+class RemarkStreamer;
+}
+
 /// Streamer for LLVM remarks which has logic for dealing with DiagnosticInfo
 /// objects.
 class LLVMRemarkStreamer {
diff --git a/llvm/include/llvm/IR/LegacyPassManager.h b/llvm/include/llvm/IR/LegacyPassManager.h
index 2459f0a5450a..b3a4820ba0e4 100644
--- a/llvm/include/llvm/IR/LegacyPassManager.h
+++ b/llvm/include/llvm/IR/LegacyPassManager.h
@@ -16,11 +16,11 @@
 #ifndef LLVM_IR_LEGACYPASSMANAGER_H
 #define LLVM_IR_LEGACYPASSMANAGER_H
 
-#include "llvm/Pass.h"
 #include "llvm/Support/CBindingWrapping.h"
 
 namespace llvm {
 
+class Function;
 class Pass;
 class Module;
 
diff --git a/llvm/include/llvm/IR/MDBuilder.h b/llvm/include/llvm/IR/MDBuilder.h
index 51be8667f1c1..42829388b79a 100644
--- a/llvm/include/llvm/IR/MDBuilder.h
+++ b/llvm/include/llvm/IR/MDBuilder.h
@@ -16,7 +16,6 @@
 
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/IR/Constants.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/Support/DataTypes.h"
 #include <utility>
@@ -28,6 +27,7 @@ template <typename T> class ArrayRef;
 class LLVMContext;
 class Constant;
 class ConstantAsMetadata;
+class Function;
 class MDNode;
 class MDString;
 class Metadata;
diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h
index 26d70b4db2d5..7965884990e5 100644
--- a/llvm/include/llvm/IR/Metadata.h
+++ b/llvm/include/llvm/IR/Metadata.h
@@ -20,9 +20,7 @@
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/PointerUnion.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/ilist_node.h"
 #include "llvm/ADT/iterator_range.h"
@@ -46,6 +44,8 @@ namespace llvm {
 class Module;
 class ModuleSlotTracker;
 class raw_ostream;
+template <typename T> class StringMapEntry;
+template <typename ValueTy> class StringMapEntryStorage;
 class Type;
 
 enum LLVMConstants : uint32_t {
@@ -682,6 +682,10 @@ struct AAMDNodes {
   // Shift tbaa.struct Metadata node to start off bytes later
   static MDNode *shiftTBAAStruct(MDNode *M, size_t off);
 
+  // Extend tbaa Metadata node to apply to a series of bytes of length len.
+  // A size of -1 denotes an unknown size.
+  static MDNode *extendToTBAA(MDNode *TBAA, ssize_t len);
+
   /// Given two sets of AAMDNodes that apply to the same pointer,
   /// give the best AAMDNodes that are compatible with both (i.e. a set of
   /// nodes whose allowable aliasing conclusions are a subset of those
@@ -708,6 +712,21 @@ struct AAMDNodes {
     return Result;
   }
 
+  /// Create a new AAMDNode that describes this AAMDNode after extending it to
+  /// apply to a series of bytes of length Len. A size of -1 denotes an unknown
+  /// size.
+  AAMDNodes extendTo(ssize_t Len) const {
+    AAMDNodes Result;
+    Result.TBAA = TBAA ? extendToTBAA(TBAA, Len) : nullptr;
+    // tbaa.struct contains (offset, size, type) triples. Extending the length
+    // of the tbaa.struct doesn't require changing this (though more information
+    // could be provided by adding more triples at subsequent lengths).
+    Result.TBAAStruct = TBAAStruct;
+    Result.Scope = Scope;
+    Result.NoAlias = NoAlias;
+    return Result;
+  }
+
   /// Given two sets of AAMDNodes applying to potentially different locations,
   /// determine the best AAMDNodes that apply to both.
   AAMDNodes merge(const AAMDNodes &Other) const;
diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h
index ec1d5ef79eed..b76bc879fb45 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -22,7 +22,6 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Module.h"
diff --git a/llvm/include/llvm/IR/PassInstrumentation.h b/llvm/include/llvm/IR/PassInstrumentation.h
index 8e81f30b2289..27dd075bbdb2 100644
--- a/llvm/include/llvm/IR/PassInstrumentation.h
+++ b/llvm/include/llvm/IR/PassInstrumentation.h
@@ -86,7 +86,7 @@ public:
   using AnalysesClearedFunc = void(StringRef);
 
 public:
-  PassInstrumentationCallbacks() {}
+  PassInstrumentationCallbacks() = default;
 
   /// Copying PassInstrumentationCallbacks is not intended.
   PassInstrumentationCallbacks(const PassInstrumentationCallbacks &) = delete;
diff --git a/llvm/include/llvm/IR/PassManager.h b/llvm/include/llvm/IR/PassManager.h
index e88d2233daba..12f9052a9edd 100644
--- a/llvm/include/llvm/IR/PassManager.h
+++ b/llvm/include/llvm/IR/PassManager.h
@@ -46,11 +46,8 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassInstrumentation.h"
 #include "llvm/IR/PassManagerInternal.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/TypeName.h"
-#include <algorithm>
 #include <cassert>
 #include <cstring>
 #include <iterator>
@@ -473,7 +470,7 @@ class PassManager : public PassInfoMixin<
                         PassManager<IRUnitT, AnalysisManagerT, ExtraArgTs...>> {
 public:
   /// Construct a pass manager.
-  explicit PassManager() {}
+  explicit PassManager() = default;
 
   // FIXME: These are equivalent to the default move constructor/move
   // assignment. However, using = default triggers linker errors due to the
diff --git a/llvm/include/llvm/IR/PassManagerImpl.h b/llvm/include/llvm/IR/PassManagerImpl.h
index bb4fbe98b082..3c94cf2811f6 100644
--- a/llvm/include/llvm/IR/PassManagerImpl.h
+++ b/llvm/include/llvm/IR/PassManagerImpl.h
@@ -20,7 +20,7 @@
 namespace llvm {
 
 template <typename IRUnitT, typename... ExtraArgTs>
-inline AnalysisManager<IRUnitT, ExtraArgTs...>::AnalysisManager() {}
+inline AnalysisManager<IRUnitT, ExtraArgTs...>::AnalysisManager() = default;
 
 template <typename IRUnitT, typename... ExtraArgTs>
 inline AnalysisManager<IRUnitT, ExtraArgTs...>::AnalysisManager(
diff --git a/llvm/include/llvm/IR/PassTimingInfo.h b/llvm/include/llvm/IR/PassTimingInfo.h
index e44321b4af66..49a83605c47a 100644
--- a/llvm/include/llvm/IR/PassTimingInfo.h
+++ b/llvm/include/llvm/IR/PassTimingInfo.h
@@ -15,8 +15,6 @@
 #ifndef LLVM_IR_PASSTIMINGINFO_H
 #define LLVM_IR_PASSTIMINGINFO_H
 
-#include "llvm/ADT/Any.h"
-#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
diff --git a/llvm/include/llvm/IR/ReplaceConstant.h b/llvm/include/llvm/IR/ReplaceConstant.h
index 5ad1d0a6f920..1d6b10d9a78b 100644
--- a/llvm/include/llvm/IR/ReplaceConstant.h
+++ b/llvm/include/llvm/IR/ReplaceConstant.h
@@ -14,13 +14,16 @@
 #ifndef LLVM_IR_REPLACECONSTANT_H
 #define LLVM_IR_REPLACECONSTANT_H
 
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Instruction.h"
 #include <map>
 #include <vector>
 
 namespace llvm {
 
+class ConstantExpr;
+class Instruction;
+class Use;
+template <typename PtrType> class SmallPtrSetImpl;
+
 /// The given instruction \p I contains given constant expression \p CE as one
 /// of its operands, possibly nested within constant expression trees. Convert
 /// all reachable paths from contant expression operands of \p I to \p CE into
diff --git a/llvm/include/llvm/IR/SSAContext.h b/llvm/include/llvm/IR/SSAContext.h
index 8879512610c2..8ca23e3ee077 100644
--- a/llvm/include/llvm/IR/SSAContext.h
+++ b/llvm/include/llvm/IR/SSAContext.h
@@ -15,18 +15,15 @@
 #ifndef LLVM_IR_SSACONTEXT_H
 #define LLVM_IR_SSACONTEXT_H
 
-#include "llvm/ADT/GenericSSAContext.h"
-#include "llvm/IR/ModuleSlotTracker.h"
 #include "llvm/Support/Printable.h"
 
-#include <memory>
-
 namespace llvm {
 class BasicBlock;
 class Function;
 class Instruction;
 class Value;
 template <typename, bool> class DominatorTreeBase;
+template <typename _FunctionT> class GenericSSAContext;
 
 template <> class GenericSSAContext<Function> {
   Function *F;
diff --git a/llvm/include/llvm/IR/SafepointIRVerifier.h b/llvm/include/llvm/IR/SafepointIRVerifier.h
index 76b147e690be..246d236adb38 100644
--- a/llvm/include/llvm/IR/SafepointIRVerifier.h
+++ b/llvm/include/llvm/IR/SafepointIRVerifier.h
@@ -37,7 +37,7 @@ FunctionPass *createSafepointIRVerifierPass();
 class SafepointIRVerifierPass : public PassInfoMixin<SafepointIRVerifierPass> {
 
 public:
-  explicit SafepointIRVerifierPass() {}
+  explicit SafepointIRVerifierPass() = default;
 
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
diff --git a/llvm/include/llvm/IR/Statepoint.h b/llvm/include/llvm/IR/Statepoint.h
index a254a67e6b1f..da9c732ad818 100644
--- a/llvm/include/llvm/IR/Statepoint.h
+++ b/llvm/include/llvm/IR/Statepoint.h
@@ -19,10 +19,9 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/IR/Attributes.h"
-#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
-#include "llvm/IR/Instruction.h"
+#include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
@@ -204,11 +203,6 @@ public:
   /// For example this could happen due to relocations on unwinding
   /// path of invoke.
   inline std::vector<const GCRelocateInst *> getGCRelocates() const;
-
-  /// Returns pair of boolean flags. The first one is true is there is
-  /// a gc.result intrinsic in the same block as statepoint. The second flag
-  /// is true if there is an intrinsic outside of the block with statepoint.
-  inline std::pair<bool, bool> getGCResultLocality() const;
 };
 
 std::vector<const GCRelocateInst *> GCStatepointInst::getGCRelocates() const {
@@ -236,18 +230,6 @@ std::vector<const GCRelocateInst *> GCStatepointInst::getGCRelocates() const {
   return Result;
 }
 
-std::pair<bool, bool> GCStatepointInst::getGCResultLocality() const {
-  std::pair<bool, bool> Res(false, false);
-  for (auto *U : users())
-    if (auto *GRI = dyn_cast<GCResultInst>(U)) {
-      if (GRI->getParent() == this->getParent())
-        Res.first = true;
-      else
-        Res.second = true;
-    }
-  return Res;
-}
-
 /// Call sites that get wrapped by a gc.statepoint (currently only in
 /// RewriteStatepointsForGC and potentially in other passes in the future) can
 /// have attributes that describe properties of gc.statepoint call they will be
diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h
index 98c97375ad7b..e4e8a5529c87 100644
--- a/llvm/include/llvm/IR/Type.h
+++ b/llvm/include/llvm/IR/Type.h
@@ -15,7 +15,6 @@
 #define LLVM_IR_TYPE_H
 
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/CBindingWrapping.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
@@ -33,6 +32,7 @@ class LLVMContext;
 class PointerType;
 class raw_ostream;
 class StringRef;
+template <typename PtrType> class SmallPtrSetImpl;
 
 /// The instances of the Type class are immutable: once they are created,
 /// they are never changed.  Also note that only one instance of a particular
diff --git a/llvm/include/llvm/IR/Use.h b/llvm/include/llvm/IR/Use.h
index 917db2679c55..64b86f3a4396 100644
--- a/llvm/include/llvm/IR/Use.h
+++ b/llvm/include/llvm/IR/Use.h
@@ -25,7 +25,6 @@
 #define LLVM_IR_USE_H
 
 #include "llvm-c/Types.h"
-#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/Support/CBindingWrapping.h"
 #include "llvm/Support/Compiler.h"
 
diff --git a/llvm/include/llvm/InterfaceStub/IFSStub.h b/llvm/include/llvm/InterfaceStub/IFSStub.h
index 5b16b8304692..8c3cd171b1a2 100644
--- a/llvm/include/llvm/InterfaceStub/IFSStub.h
+++ b/llvm/include/llvm/InterfaceStub/IFSStub.h
@@ -95,7 +95,7 @@ struct IFSStub {
   std::vector<std::string> NeededLibs;
   std::vector<IFSSymbol> Symbols;
 
-  IFSStub() {}
+  IFSStub() = default;
   IFSStub(const IFSStub &Stub);
   IFSStub(IFSStub &&Stub);
 };
@@ -106,7 +106,7 @@ struct IFSStub {
 // This class makes it possible to map a second traits so the same data
 // structure can be used for 2 different yaml schema.
 struct IFSStubTriple : IFSStub {
-  IFSStubTriple() {}
+  IFSStubTriple() = default;
   IFSStubTriple(const IFSStub &Stub);
   IFSStubTriple(const IFSStubTriple &Stub);
   IFSStubTriple(IFSStubTriple &&Stub);
diff --git a/llvm/include/llvm/LineEditor/LineEditor.h b/llvm/include/llvm/LineEditor/LineEditor.h
index 0beaf1bb23a9..9f4ea5bee139 100644
--- a/llvm/include/llvm/LineEditor/LineEditor.h
+++ b/llvm/include/llvm/LineEditor/LineEditor.h
@@ -64,7 +64,7 @@ public:
 
   /// A possible completion at a given cursor position.
   struct Completion {
-    Completion() {}
+    Completion() = default;
     Completion(const std::string &TypedText, const std::string &DisplayText)
         : TypedText(TypedText), DisplayText(DisplayText) {}
 
diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h
index 88d86d5b675a..d2307d692278 100644
--- a/llvm/include/llvm/MC/MCContext.h
+++ b/llvm/include/llvm/MC/MCContext.h
@@ -80,6 +80,10 @@ namespace llvm {
   private:
     Environment Env;
 
+    /// The name of the Segment where Swift5 Reflection Section data will be
+    /// outputted
+    StringRef Swift5ReflectionSegmentName;
+
     /// The triple for this object.
     Triple TT;
 
@@ -399,13 +403,17 @@ namespace llvm {
                        const MCRegisterInfo *MRI, const MCSubtargetInfo *MSTI,
                        const SourceMgr *Mgr = nullptr,
                        MCTargetOptions const *TargetOpts = nullptr,
-                       bool DoAutoReset = true);
+                       bool DoAutoReset = true,
+                       StringRef Swift5ReflSegmentName = {});
     MCContext(const MCContext &) = delete;
     MCContext &operator=(const MCContext &) = delete;
     ~MCContext();
 
     Environment getObjectFileType() const { return Env; }
 
+    const StringRef &getSwift5ReflectionSegmentName() const {
+      return Swift5ReflectionSegmentName;
+    }
     const Triple &getTargetTriple() const { return TT; }
     const SourceMgr *getSourceManager() const { return SrcMgr; }
 
diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h
index 5e0cccaba77f..3c1d10c4e62f 100644
--- a/llvm/include/llvm/MC/MCObjectFileInfo.h
+++ b/llvm/include/llvm/MC/MCObjectFileInfo.h
@@ -15,6 +15,7 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/Swift.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/VersionTuple.h"
@@ -228,6 +229,10 @@ protected:
   MCSection *ReadOnly8Section = nullptr;
   MCSection *ReadOnly16Section = nullptr;
 
+  // Swift5 Reflection Data Sections
+  std::array<MCSection *, binaryformat::Swift5ReflectionSectionKind::last>
+      Swift5ReflectionSections = {};
+
 public:
   void initMCObjectFileInfo(MCContext &MCCtx, bool PIC,
                             bool LargeCodeModel = false);
@@ -423,6 +428,15 @@ public:
 
   bool isPositionIndependent() const { return PositionIndependent; }
 
+  // Swift5 Reflection Data Sections
+  MCSection *getSwift5ReflectionSection(
+      llvm::binaryformat::Swift5ReflectionSectionKind ReflSectionKind) {
+    return ReflSectionKind !=
+                   llvm::binaryformat::Swift5ReflectionSectionKind::unknown
+               ? Swift5ReflectionSections[ReflSectionKind]
+               : nullptr;
+  }
+
 private:
   bool PositionIndependent = false;
   MCContext *Ctx = nullptr;
diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h
index 17b7446baae8..9ff68f4236ca 100644
--- a/llvm/include/llvm/MC/MCPseudoProbe.h
+++ b/llvm/include/llvm/MC/MCPseudoProbe.h
@@ -268,7 +268,7 @@ public:
   // Used for decoding
   uint32_t ChildrenToProcess = 0;
 
-  MCDecodedPseudoProbeInlineTree(){};
+  MCDecodedPseudoProbeInlineTree() = default;
   MCDecodedPseudoProbeInlineTree(const InlineSite &Site) : ISite(Site){};
 
   // Return false if it's a dummy inline site
diff --git a/llvm/include/llvm/MCA/CustomBehaviour.h b/llvm/include/llvm/MCA/CustomBehaviour.h
index 5b993c6a5345..c4be5312ea19 100644
--- a/llvm/include/llvm/MCA/CustomBehaviour.h
+++ b/llvm/include/llvm/MCA/CustomBehaviour.h
@@ -41,7 +41,7 @@ public:
   InstrPostProcess(const MCSubtargetInfo &STI, const MCInstrInfo &MCII)
       : STI(STI), MCII(MCII) {}
 
-  virtual ~InstrPostProcess() {}
+  virtual ~InstrPostProcess() = default;
 
   /// This method can be overriden by targets to modify the mca::Instruction
   /// object after it has been lowered from the MCInst.
diff --git a/llvm/include/llvm/MCA/HWEventListener.h b/llvm/include/llvm/MCA/HWEventListener.h
index 5b5b83cccd9c..8298e0705d33 100644
--- a/llvm/include/llvm/MCA/HWEventListener.h
+++ b/llvm/include/llvm/MCA/HWEventListener.h
@@ -176,7 +176,7 @@ public:
   virtual void onReleasedBuffers(const InstRef &Inst,
                                  ArrayRef<unsigned> Buffers) {}
 
-  virtual ~HWEventListener() {}
+  virtual ~HWEventListener() = default;
 
 private:
   virtual void anchor();
diff --git a/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h b/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h
index 7467fd6754f0..1c909b01a390 100644
--- a/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h
+++ b/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h
@@ -49,7 +49,7 @@ class ResourceStrategy {
   ResourceStrategy &operator=(const ResourceStrategy &) = delete;
 
 public:
-  ResourceStrategy() {}
+  ResourceStrategy() = default;
   virtual ~ResourceStrategy();
 
   /// Selects a processor resource unit from a ReadyMask.
diff --git a/llvm/include/llvm/Object/Archive.h b/llvm/include/llvm/Object/Archive.h
index 5a5fc90f18bd..b792cbc3d9ac 100644
--- a/llvm/include/llvm/Object/Archive.h
+++ b/llvm/include/llvm/Object/Archive.h
@@ -45,7 +45,7 @@ protected:
 public:
   friend class Archive;
   virtual std::unique_ptr<AbstractArchiveMemberHeader> clone() const = 0;
-  virtual ~AbstractArchiveMemberHeader(){};
+  virtual ~AbstractArchiveMemberHeader() = default;
 
   /// Get the name without looking up long names.
   virtual Expected<StringRef> getRawName() const = 0;
diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h
index e59a63d93989..c674b80c814d 100644
--- a/llvm/include/llvm/Object/ELFTypes.h
+++ b/llvm/include/llvm/Object/ELFTypes.h
@@ -699,7 +699,7 @@ private:
     }
   }
 
-  Elf_Note_Iterator_Impl() {}
+  Elf_Note_Iterator_Impl() = default;
   explicit Elf_Note_Iterator_Impl(Error &Err) : Err(&Err) {}
   Elf_Note_Iterator_Impl(const uint8_t *Start, size_t Size, Error &Err)
       : RemainingSize(Size), Err(&Err) {
diff --git a/llvm/include/llvm/Object/MachO.h b/llvm/include/llvm/Object/MachO.h
index ede742c47f97..49a0706b84be 100644
--- a/llvm/include/llvm/Object/MachO.h
+++ b/llvm/include/llvm/Object/MachO.h
@@ -22,6 +22,7 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/BinaryFormat/MachO.h"
+#include "llvm/BinaryFormat/Swift.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/ObjectFile.h"
@@ -583,6 +584,9 @@ public:
 
   StringRef mapDebugSectionName(StringRef Name) const override;
 
+  llvm::binaryformat::Swift5ReflectionSectionKind
+  mapReflectionSectionNameToEnumValue(StringRef SectionName) const override;
+
   bool hasPageZeroSegment() const { return HasPageZeroSegment; }
 
   static bool classof(const Binary *v) {
diff --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h
index 12704b1fc88e..950c38a599d5 100644
--- a/llvm/include/llvm/Object/ObjectFile.h
+++ b/llvm/include/llvm/Object/ObjectFile.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/BinaryFormat/Magic.h"
+#include "llvm/BinaryFormat/Swift.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/Error.h"
 #include "llvm/Object/SymbolicFile.h"
@@ -290,6 +291,11 @@ protected:
   virtual void getRelocationTypeName(DataRefImpl Rel,
                                      SmallVectorImpl<char> &Result) const = 0;
 
+  virtual llvm::binaryformat::Swift5ReflectionSectionKind
+  mapReflectionSectionNameToEnumValue(StringRef SectionName) const {
+    return llvm::binaryformat::Swift5ReflectionSectionKind::unknown;
+  };
+
   Expected<uint64_t> getSymbolValue(DataRefImpl Symb) const;
 
 public:
diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h
index 9eb754a4d824..561cd54fa998 100644
--- a/llvm/include/llvm/Passes/StandardInstrumentations.h
+++ b/llvm/include/llvm/Passes/StandardInstrumentations.h
@@ -75,7 +75,7 @@ private:
 
 class OptBisectInstrumentation {
 public:
-  OptBisectInstrumentation() {}
+  OptBisectInstrumentation() = default;
   void registerCallbacks(PassInstrumentationCallbacks &PIC);
 };
 
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 4d3bb0e8ff10..a416eb28906e 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -16,6 +16,7 @@
 #define LLVM_PROFILEDATA_INSTRPROF_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitmaskEnum.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSet.h"
@@ -277,6 +278,18 @@ void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName);
 /// the duplicated profile variables for Comdat functions.
 bool needsComdatForCounter(const Function &F, const Module &M);
 
+/// An enum describing the attributes of an instrumented profile.
+enum class InstrProfKind {
+  Unknown = 0x0,
+  FE = 0x1, // A frontend clang profile, incompatible with other attrs.
+  IR = 0x2, // An IR-level profile (default when -fprofile-generate is used).
+  BB = 0x4, // A profile with entry basic block instrumentation.
+  CS = 0x8, // A context sensitive IR-level profile.
+  SingleByteCoverage = 0x10, // Use single byte probes for coverage.
+  FunctionEntryOnly = 0x20,  // Only instrument the function entry basic block.
+  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/FunctionEntryOnly)
+};
+
 const std::error_category &instrprof_category();
 
 enum class instrprof_error {
@@ -1155,12 +1168,6 @@ struct Header {
 void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart,
                                  int64_t &RangeLast);
 
-// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
-// aware this is an ir_level profile so it can set the version flag.
-GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS,
-                                            bool InstrEntryBBEnabled,
-                                            bool DebugInfoCorrelate);
-
 // Create the variable for the profile file name.
 void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput);
 
diff --git a/llvm/include/llvm/ProfileData/InstrProfCorrelator.h b/llvm/include/llvm/ProfileData/InstrProfCorrelator.h
index 135936b99f24..3d0076fd9035 100644
--- a/llvm/include/llvm/ProfileData/InstrProfCorrelator.h
+++ b/llvm/include/llvm/ProfileData/InstrProfCorrelator.h
@@ -55,7 +55,7 @@ public:
 
   enum InstrProfCorrelatorKind { CK_32Bit, CK_64Bit };
   InstrProfCorrelatorKind getKind() const { return Kind; }
-  virtual ~InstrProfCorrelator() {}
+  virtual ~InstrProfCorrelator() = default;
 
 protected:
   struct Context {
diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc
index 0544b6b2ef71..62054a6a3df5 100644
--- a/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -660,6 +660,8 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
  * generated profile, and 0 if this is a Clang FE generated profile.
  * 1 in bit 57 indicates there are context-sensitive records in the profile.
  * The 59th bit indicates whether to use debug info to correlate profiles.
+ * The 60th bit indicates single byte coverage instrumentation.
+ * The 61st bit indicates function entry instrumentation only.
  */
 #define VARIANT_MASKS_ALL 0xff00000000000000ULL
 #define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL)
@@ -667,6 +669,8 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
 #define VARIANT_MASK_CSIR_PROF (0x1ULL << 57)
 #define VARIANT_MASK_INSTR_ENTRY (0x1ULL << 58)
 #define VARIANT_MASK_DBG_CORRELATE (0x1ULL << 59)
+#define VARIANT_MASK_BYTE_COVERAGE (0x1ULL << 60)
+#define VARIANT_MASK_FUNCTION_ENTRY_ONLY (0x1ULL << 61)
 #define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version
 #define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime
 #define INSTR_PROF_PROFILE_COUNTER_BIAS_VAR __llvm_profile_counter_bias
diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index 1326cbf0e1ce..e9dd19a69792 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -100,6 +100,16 @@ public:
   /// Return true if we must provide debug info to create PGO profiles.
   virtual bool useDebugInfoCorrelate() const { return false; }
 
+  /// Return true if the profile has single byte counters representing coverage.
+  virtual bool hasSingleByteCoverage() const = 0;
+
+  /// Return true if the profile only instruments function entries.
+  virtual bool functionEntryOnly() const = 0;
+
+  /// Returns a BitsetEnum describing the attributes of the profile. To check
+  /// individual attributes prefer using the helpers above.
+  virtual InstrProfKind getProfileKind() const = 0;
+
   /// Return the PGO symtab. There are three different readers:
   /// Raw, Text, and Indexed profile readers. The first two types
   /// of readers are used only by llvm-profdata tool, while the indexed
@@ -176,9 +186,8 @@ private:
   std::unique_ptr<MemoryBuffer> DataBuffer;
   /// Iterator over the profile data.
   line_iterator Line;
-  bool IsIRLevelProfile = false;
-  bool HasCSIRLevelProfile = false;
-  bool InstrEntryBBEnabled = false;
+  /// The attributes of the current profile.
+  InstrProfKind ProfileKind = InstrProfKind::Unknown;
 
   Error readValueProfileData(InstrProfRecord &Record);
 
@@ -191,11 +200,27 @@ public:
   /// Return true if the given buffer is in text instrprof format.
   static bool hasFormat(const MemoryBuffer &Buffer);
 
-  bool isIRLevelProfile() const override { return IsIRLevelProfile; }
+  bool isIRLevelProfile() const override {
+    return static_cast<bool>(ProfileKind & InstrProfKind::IR);
+  }
 
-  bool hasCSIRLevelProfile() const override { return HasCSIRLevelProfile; }
+  bool hasCSIRLevelProfile() const override {
+    return static_cast<bool>(ProfileKind & InstrProfKind::CS);
+  }
 
-  bool instrEntryBBEnabled() const override { return InstrEntryBBEnabled; }
+  bool instrEntryBBEnabled() const override {
+    return static_cast<bool>(ProfileKind & InstrProfKind::BB);
+  }
+
+  bool hasSingleByteCoverage() const override {
+    return static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage);
+  }
+
+  bool functionEntryOnly() const override {
+    return static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryOnly);
+  }
+
+  InstrProfKind getProfileKind() const override { return ProfileKind; }
 
   /// Read the header.
   Error readHeader() override;
@@ -276,6 +301,17 @@ public:
     return (Version & VARIANT_MASK_DBG_CORRELATE) != 0;
   }
 
+  bool hasSingleByteCoverage() const override {
+    return (Version & VARIANT_MASK_BYTE_COVERAGE) != 0;
+  }
+
+  bool functionEntryOnly() const override {
+    return (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) != 0;
+  }
+
+  /// Returns a BitsetEnum describing the attributes of the raw instr profile.
+  InstrProfKind getProfileKind() const override;
+
   InstrProfSymtab &getSymtab() override {
     assert(Symtab.get());
     return *Symtab.get();
@@ -333,7 +369,9 @@ private:
     return Symtab->getFuncName(swap(NameRef));
   }
 
-  int getCounterTypeSize() const { return sizeof(uint64_t); }
+  int getCounterTypeSize() const {
+    return hasSingleByteCoverage() ? sizeof(uint8_t) : sizeof(uint64_t);
+  }
 };
 
 using RawInstrProfReader32 = RawInstrProfReader<uint32_t>;
@@ -413,6 +451,9 @@ struct InstrProfReaderIndexBase {
   virtual bool isIRLevelProfile() const = 0;
   virtual bool hasCSIRLevelProfile() const = 0;
   virtual bool instrEntryBBEnabled() const = 0;
+  virtual bool hasSingleByteCoverage() const = 0;
+  virtual bool functionEntryOnly() const = 0;
+  virtual InstrProfKind getProfileKind() const = 0;
   virtual Error populateSymtab(InstrProfSymtab &) = 0;
 };
 
@@ -465,6 +506,16 @@ public:
     return (FormatVersion & VARIANT_MASK_INSTR_ENTRY) != 0;
   }
 
+  bool hasSingleByteCoverage() const override {
+    return (FormatVersion & VARIANT_MASK_BYTE_COVERAGE) != 0;
+  }
+
+  bool functionEntryOnly() const override {
+    return (FormatVersion & VARIANT_MASK_FUNCTION_ENTRY_ONLY) != 0;
+  }
+
+  InstrProfKind getProfileKind() const override;
+
   Error populateSymtab(InstrProfSymtab &Symtab) override {
     return Symtab.create(HashTable->keys());
   }
@@ -473,7 +524,7 @@ public:
 /// Name matcher supporting fuzzy matching of symbol names to names in profiles.
 class InstrProfReaderRemapper {
 public:
-  virtual ~InstrProfReaderRemapper() {}
+  virtual ~InstrProfReaderRemapper() = default;
   virtual Error populateRemappings() { return Error::success(); }
   virtual Error getRecords(StringRef FuncName,
                            ArrayRef<NamedInstrProfRecord> &Data) = 0;
@@ -523,6 +574,18 @@ public:
     return Index->instrEntryBBEnabled();
   }
 
+  bool hasSingleByteCoverage() const override {
+    return Index->hasSingleByteCoverage();
+  }
+
+  bool functionEntryOnly() const override { return Index->functionEntryOnly(); }
+
+  /// Returns a BitsetEnum describing the attributes of the indexed instr
+  /// profile.
+  InstrProfKind getProfileKind() const override {
+    return Index->getProfileKind();
+  }
+
   /// Return true if the given buffer is in an indexed instrprof format.
   static bool hasFormat(const MemoryBuffer &DataBuffer);
 
diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index 97c80de6aa23..af1e46cf4fc2 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -33,19 +33,17 @@ class raw_fd_ostream;
 class InstrProfWriter {
 public:
   using ProfilingData = SmallDenseMap<uint64_t, InstrProfRecord>;
-  // PF_IRLevelWithCS is the profile from context sensitive IR instrumentation.
-  enum ProfKind { PF_Unknown = 0, PF_FE, PF_IRLevel, PF_IRLevelWithCS };
 
 private:
   bool Sparse;
   StringMap<ProfilingData> FunctionData;
-  ProfKind ProfileKind = PF_Unknown;
-  bool InstrEntryBBEnabled;
+  // An enum describing the attributes of the profile.
+  InstrProfKind ProfileKind = InstrProfKind::Unknown;
   // Use raw pointer here for the incomplete type object.
   InstrProfRecordWriterTrait *InfoObj;
 
 public:
-  InstrProfWriter(bool Sparse = false, bool InstrEntryBBEnabled = false);
+  InstrProfWriter(bool Sparse = false);
   ~InstrProfWriter();
 
   StringMap<ProfilingData> &getProfileData() { return FunctionData; }
@@ -79,30 +77,41 @@ public:
   /// Write the profile, returning the raw data. For testing.
   std::unique_ptr<MemoryBuffer> writeBuffer();
 
-  /// Set the ProfileKind. Report error if mixing FE and IR level profiles.
-  /// \c WithCS indicates if this is for contenxt sensitive instrumentation.
-  Error setIsIRLevelProfile(bool IsIRLevel, bool WithCS) {
-    if (ProfileKind == PF_Unknown) {
-      if (IsIRLevel)
-        ProfileKind = WithCS ? PF_IRLevelWithCS : PF_IRLevel;
-      else
-        ProfileKind = PF_FE;
+  /// Update the attributes of the current profile from the attributes
+  /// specified. An error is returned if IR and FE profiles are mixed.
+  Error mergeProfileKind(const InstrProfKind Other) {
+    // If the kind is unset, this is the first profile we are merging so just
+    // set it to the given type.
+    if (ProfileKind == InstrProfKind::Unknown) {
+      ProfileKind = Other;
       return Error::success();
     }
 
-    if (((ProfileKind != PF_FE) && !IsIRLevel) ||
-        ((ProfileKind == PF_FE) && IsIRLevel))
+    // Returns true if merging is should fail assuming A and B are incompatible.
+    auto testIncompatible = [&](InstrProfKind A, InstrProfKind B) {
+      return (static_cast<bool>(ProfileKind & A) &&
+              static_cast<bool>(Other & B)) ||
+             (static_cast<bool>(ProfileKind & B) &&
+              static_cast<bool>(Other & A));
+    };
+
+    // Check if the profiles are in-compatible. Clang frontend profiles can't be
+    // merged with other profile types.
+    if (static_cast<bool>((ProfileKind & InstrProfKind::FE) ^
+                          (Other & InstrProfKind::FE))) {
       return make_error<InstrProfError>(instrprof_error::unsupported_version);
+    }
+    if (testIncompatible(InstrProfKind::FunctionEntryOnly, InstrProfKind::BB)) {
+      return make_error<InstrProfError>(
+          instrprof_error::unsupported_version,
+          "cannot merge FunctionEntryOnly profiles and BB profiles together");
+    }
 
-    // When merging a context-sensitive profile (WithCS == true) with an IRLevel
-    // profile, set the kind to PF_IRLevelWithCS.
-    if (ProfileKind == PF_IRLevel && WithCS)
-      ProfileKind = PF_IRLevelWithCS;
-
+    // Now we update the profile type with the bits that are set.
+    ProfileKind |= Other;
     return Error::success();
   }
 
-  void setInstrEntryBBEnabled(bool Enabled) { InstrEntryBBEnabled = Enabled; }
   // Internal interface for testing purpose only.
   void setValueProfDataEndianness(support::endianness Endianness);
   void setOutputSparse(bool Sparse);
diff --git a/llvm/include/llvm/ProfileData/MemProfData.inc b/llvm/include/llvm/ProfileData/MemProfData.inc
index f2cb3738f053..ff22a697965c 100644
--- a/llvm/include/llvm/ProfileData/MemProfData.inc
+++ b/llvm/include/llvm/ProfileData/MemProfData.inc
@@ -20,11 +20,10 @@
  *
 \*===----------------------------------------------------------------------===*/
 
-
 #ifdef _MSC_VER
-#define PACKED(__decl__) __pragma(pack(push,1)) __decl__ __pragma(pack(pop))
+#define PACKED(...) __pragma(pack(push,1)) __VA_ARGS__ __pragma(pack(pop))
 #else
-#define PACKED(__decl__) __decl__ __attribute__((__packed__))
+#define PACKED(...) __VA_ARGS__ __attribute__((__packed__))
 #endif
 
 // A 64-bit magic number to uniquely identify the raw binary memprof profile file.
@@ -47,14 +46,106 @@ PACKED(struct Header {
   uint64_t StackOffset;
 });
 
+
 // A struct describing the information necessary to describe a /proc/maps
 // segment entry for a particular binary/library identified by its build id.
 PACKED(struct SegmentEntry {
   uint64_t Start;
   uint64_t End;
   uint64_t Offset;
-  uint8_t BuildId[32];
+  // This field is unused until sanitizer procmaps support for build ids for
+  // Linux-Elf is implemented.
+  uint8_t BuildId[32] = {0};
+
+  SegmentEntry(uint64_t S, uint64_t E, uint64_t O) :
+    Start(S), End(E), Offset(O) {}
+
+  SegmentEntry(const SegmentEntry& S) {
+    Start = S.Start;
+    End = S.End;
+    Offset = S.Offset;
+  }
+
+  SegmentEntry& operator=(const SegmentEntry& S) {
+    Start = S.Start;
+    End = S.End;
+    Offset = S.Offset;
+    return *this;
+  }
+
+  bool operator==(const SegmentEntry& S) const {
+    return Start == S.Start &&
+           End == S.End &&
+           Offset == S.Offset;
+  }
 });
+
+// A struct representing the heap allocation characteristics of a particular
+// runtime context. This struct is shared between the compiler-rt runtime and
+// the raw profile reader. The indexed format uses a separate, self-describing
+// backwards compatible format.
+PACKED(struct MemInfoBlock {
+  uint32_t alloc_count;
+  uint64_t total_access_count, min_access_count, max_access_count;
+  uint64_t total_size;
+  uint32_t min_size, max_size;
+  uint32_t alloc_timestamp, dealloc_timestamp;
+  uint64_t total_lifetime;
+  uint32_t min_lifetime, max_lifetime;
+  uint32_t alloc_cpu_id, dealloc_cpu_id;
+  uint32_t num_migrated_cpu;
+
+  // Only compared to prior deallocated object currently.
+  uint32_t num_lifetime_overlaps;
+  uint32_t num_same_alloc_cpu;
+  uint32_t num_same_dealloc_cpu;
+
+  uint64_t data_type_id; // TODO: hash of type name
+
+  MemInfoBlock() : alloc_count(0) {}
+
+  MemInfoBlock(uint32_t size, uint64_t access_count, uint32_t alloc_timestamp,
+               uint32_t dealloc_timestamp, uint32_t alloc_cpu, uint32_t dealloc_cpu)
+      : alloc_count(1), total_access_count(access_count),
+        min_access_count(access_count), max_access_count(access_count),
+        total_size(size), min_size(size), max_size(size),
+        alloc_timestamp(alloc_timestamp), dealloc_timestamp(dealloc_timestamp),
+        total_lifetime(dealloc_timestamp - alloc_timestamp),
+        min_lifetime(total_lifetime), max_lifetime(total_lifetime),
+        alloc_cpu_id(alloc_cpu), dealloc_cpu_id(dealloc_cpu),
+        num_lifetime_overlaps(0), num_same_alloc_cpu(0),
+        num_same_dealloc_cpu(0) {
+    num_migrated_cpu = alloc_cpu_id != dealloc_cpu_id;
+  }
+
+  void Merge(const MemInfoBlock &newMIB) {
+    alloc_count += newMIB.alloc_count;
+
+    total_access_count += newMIB.total_access_count;
+    min_access_count = newMIB.min_access_count < min_access_count ? newMIB.min_access_count : min_access_count;
+    max_access_count = newMIB.max_access_count < max_access_count ? newMIB.max_access_count : max_access_count;
+
+    total_size += newMIB.total_size;
+    min_size = newMIB.min_size < min_size ? newMIB.min_size : min_size;
+    max_size = newMIB.max_size < max_size ? newMIB.max_size : max_size;
+
+    total_lifetime += newMIB.total_lifetime;
+    min_lifetime = newMIB.min_lifetime < min_lifetime ? newMIB.min_lifetime : min_lifetime;
+    max_lifetime = newMIB.max_lifetime > max_lifetime ? newMIB.max_lifetime : max_lifetime;
+
+    // We know newMIB was deallocated later, so just need to check if it was
+    // allocated before last one deallocated.
+    num_lifetime_overlaps += newMIB.alloc_timestamp < dealloc_timestamp;
+    alloc_timestamp = newMIB.alloc_timestamp;
+    dealloc_timestamp = newMIB.dealloc_timestamp;
+
+    num_same_alloc_cpu += alloc_cpu_id == newMIB.alloc_cpu_id;
+    num_same_dealloc_cpu += dealloc_cpu_id == newMIB.dealloc_cpu_id;
+    alloc_cpu_id = newMIB.alloc_cpu_id;
+    dealloc_cpu_id = newMIB.dealloc_cpu_id;
+  }
+});
+
 } // namespace memprof
 } // namespace llvm
 
diff --git a/llvm/include/llvm/Remarks/BitstreamRemarkSerializer.h b/llvm/include/llvm/Remarks/BitstreamRemarkSerializer.h
index 645a8b3c0b17..19080c0132e3 100644
--- a/llvm/include/llvm/Remarks/BitstreamRemarkSerializer.h
+++ b/llvm/include/llvm/Remarks/BitstreamRemarkSerializer.h
@@ -17,11 +17,12 @@
 #include "llvm/Bitstream/BitstreamWriter.h"
 #include "llvm/Remarks/BitstreamRemarkContainer.h"
 #include "llvm/Remarks/RemarkSerializer.h"
-#include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
 namespace remarks {
 
+struct Remarks;
+
 /// Serialize the remarks to LLVM bitstream.
 /// This class provides ways to emit remarks in the LLVM bitstream format and
 /// its associated metadata.
diff --git a/llvm/include/llvm/Remarks/RemarkLinker.h b/llvm/include/llvm/Remarks/RemarkLinker.h
index 49fd880be8ba..79d74e39deee 100644
--- a/llvm/include/llvm/Remarks/RemarkLinker.h
+++ b/llvm/include/llvm/Remarks/RemarkLinker.h
@@ -13,7 +13,6 @@
 #ifndef LLVM_REMARKS_REMARKLINKER_H
 #define LLVM_REMARKS_REMARKLINKER_H
 
-#include "llvm/Object/ObjectFile.h"
 #include "llvm/Remarks/Remark.h"
 #include "llvm/Remarks/RemarkFormat.h"
 #include "llvm/Remarks/RemarkStringTable.h"
@@ -22,6 +21,11 @@
 #include <set>
 
 namespace llvm {
+
+namespace object {
+class ObjectFile;
+}
+
 namespace remarks {
 
 struct RemarkLinker {
diff --git a/llvm/include/llvm/Remarks/RemarkParser.h b/llvm/include/llvm/Remarks/RemarkParser.h
index b838f75e530f..61dfdbf3c17c 100644
--- a/llvm/include/llvm/Remarks/RemarkParser.h
+++ b/llvm/include/llvm/Remarks/RemarkParser.h
@@ -13,9 +13,7 @@
 #ifndef LLVM_REMARKS_REMARKPARSER_H
 #define LLVM_REMARKS_REMARKPARSER_H
 
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Remarks/Remark.h"
 #include "llvm/Remarks/RemarkFormat.h"
 #include "llvm/Support/Error.h"
 #include <memory>
@@ -23,11 +21,13 @@
 namespace llvm {
 namespace remarks {
 
+struct Remark;
+
 class EndOfFileError : public ErrorInfo<EndOfFileError> {
 public:
   static char ID;
 
-  EndOfFileError() {}
+  EndOfFileError() = default;
 
   void log(raw_ostream &OS) const override { OS << "End of file reached."; }
   std::error_code convertToErrorCode() const override {
diff --git a/llvm/include/llvm/Remarks/RemarkSerializer.h b/llvm/include/llvm/Remarks/RemarkSerializer.h
index 90e556df87e7..6217bd98d1a5 100644
--- a/llvm/include/llvm/Remarks/RemarkSerializer.h
+++ b/llvm/include/llvm/Remarks/RemarkSerializer.h
@@ -16,11 +16,15 @@
 #include "llvm/Remarks/Remark.h"
 #include "llvm/Remarks/RemarkFormat.h"
 #include "llvm/Remarks/RemarkStringTable.h"
-#include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
+
+class raw_ostream;
+
 namespace remarks {
 
+struct Remark;
+
 enum class SerializerMode {
   Separate,  // A mode where the metadata is serialized separately from the
              // remarks. Typically, this is used when the remarks need to be
diff --git a/llvm/include/llvm/Remarks/RemarkStreamer.h b/llvm/include/llvm/Remarks/RemarkStreamer.h
index 7741cb45b72c..b25cb0c331a4 100644
--- a/llvm/include/llvm/Remarks/RemarkStreamer.h
+++ b/llvm/include/llvm/Remarks/RemarkStreamer.h
@@ -34,10 +34,12 @@
 #include "llvm/Remarks/RemarkSerializer.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/Regex.h"
-#include "llvm/Support/raw_ostream.h"
 #include <memory>
 
 namespace llvm {
+
+class raw_ostream;
+
 namespace remarks {
 class RemarkStreamer final {
   /// The regex used to filter remarks based on the passes that emit them.
diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def
index 26f4bae53119..a953e9439db4 100644
--- a/llvm/include/llvm/Support/AArch64TargetParser.def
+++ b/llvm/include/llvm/Support/AArch64TargetParser.def
@@ -204,6 +204,9 @@ AARCH64_CPU_NAME("cortex-r82", ARMV8R, FK_CRYPTO_NEON_FP_ARMV8, false,
 AARCH64_CPU_NAME("cortex-x1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
                  (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC |
                   AArch64::AEK_SSBS))
+AARCH64_CPU_NAME("cortex-x1c", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
+                 (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC |
+                  AArch64::AEK_SSBS | AArch64::AEK_PAUTH))
 AARCH64_CPU_NAME("cortex-x2", ARMV9A, FK_NEON_FP_ARMV8, false,
                  (AArch64::AEK_MTE | AArch64::AEK_BF16 | AArch64::AEK_I8MM |
                   AArch64::AEK_PAUTH | AArch64::AEK_SSBS | AArch64::AEK_SB |
diff --git a/llvm/include/llvm/Support/AMDGPUMetadata.h b/llvm/include/llvm/Support/AMDGPUMetadata.h
index 784a980fee24..e0838a1f425e 100644
--- a/llvm/include/llvm/Support/AMDGPUMetadata.h
+++ b/llvm/include/llvm/Support/AMDGPUMetadata.h
@@ -44,6 +44,11 @@ constexpr uint32_t VersionMajorV4 = 1;
 /// HSA metadata minor version for code object V4.
 constexpr uint32_t VersionMinorV4 = 1;
 
+/// HSA metadata major version for code object V5.
+constexpr uint32_t VersionMajorV5 = 1;
+/// HSA metadata minor version for code object V5.
+constexpr uint32_t VersionMinorV5 = 2;
+
 /// HSA metadata beginning assembler directive.
 constexpr char AssemblerDirectiveBegin[] = ".amd_amdgpu_hsa_metadata";
 /// HSA metadata ending assembler directive.
diff --git a/llvm/include/llvm/Support/ARMTargetParser.def b/llvm/include/llvm/Support/ARMTargetParser.def
index 433d7fdc2c3b..80deeb2a6e9d 100644
--- a/llvm/include/llvm/Support/ARMTargetParser.def
+++ b/llvm/include/llvm/Support/ARMTargetParser.def
@@ -328,6 +328,8 @@ ARM_CPU_NAME("cortex-a710", ARMV9A, FK_NEON_FP_ARMV8, false,
               ARM::AEK_I8MM))
 ARM_CPU_NAME("cortex-x1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
              (ARM::AEK_FP16 | ARM::AEK_DOTPROD))
+ARM_CPU_NAME("cortex-x1c", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
+             (ARM::AEK_FP16 | ARM::AEK_DOTPROD))
 ARM_CPU_NAME("neoverse-n1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
              (ARM::AEK_FP16 | ARM::AEK_DOTPROD))
 ARM_CPU_NAME("neoverse-n2", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false,
diff --git a/llvm/include/llvm/Support/BinaryStreamReader.h b/llvm/include/llvm/Support/BinaryStreamReader.h
index c664ac48daad..6853df3ccab1 100644
--- a/llvm/include/llvm/Support/BinaryStreamReader.h
+++ b/llvm/include/llvm/Support/BinaryStreamReader.h
@@ -35,16 +35,11 @@ public:
                               llvm::support::endianness Endian);
   explicit BinaryStreamReader(StringRef Data, llvm::support::endianness Endian);
 
-  BinaryStreamReader(const BinaryStreamReader &Other)
-      : Stream(Other.Stream), Offset(Other.Offset) {}
+  BinaryStreamReader(const BinaryStreamReader &Other) = default;
 
-  BinaryStreamReader &operator=(const BinaryStreamReader &Other) {
-    Stream = Other.Stream;
-    Offset = Other.Offset;
-    return *this;
-  }
+  BinaryStreamReader &operator=(const BinaryStreamReader &Other) = default;
 
-  virtual ~BinaryStreamReader() {}
+  virtual ~BinaryStreamReader() = default;
 
   /// Read as much as possible from the underlying string at the current offset
   /// without invoking a copy, and set \p Buffer to the resulting data slice.
diff --git a/llvm/include/llvm/Support/BinaryStreamWriter.h b/llvm/include/llvm/Support/BinaryStreamWriter.h
index c05b0420aaa3..ce7af3650f52 100644
--- a/llvm/include/llvm/Support/BinaryStreamWriter.h
+++ b/llvm/include/llvm/Support/BinaryStreamWriter.h
@@ -35,16 +35,11 @@ public:
   explicit BinaryStreamWriter(MutableArrayRef<uint8_t> Data,
                               llvm::support::endianness Endian);
 
-  BinaryStreamWriter(const BinaryStreamWriter &Other)
-      : Stream(Other.Stream), Offset(Other.Offset) {}
+  BinaryStreamWriter(const BinaryStreamWriter &Other) = default;
 
-  BinaryStreamWriter &operator=(const BinaryStreamWriter &Other) {
-    Stream = Other.Stream;
-    Offset = Other.Offset;
-    return *this;
-  }
+  BinaryStreamWriter &operator=(const BinaryStreamWriter &Other) = default;
 
-  virtual ~BinaryStreamWriter() {}
+  virtual ~BinaryStreamWriter() = default;
 
   /// Write the bytes specified in \p Buffer to the underlying stream.
   /// On success, updates the offset so that subsequent writes will occur
diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h
index 120ab1840915..c8e29ac42559 100644
--- a/llvm/include/llvm/Support/CommandLine.h
+++ b/llvm/include/llvm/Support/CommandLine.h
@@ -877,7 +877,7 @@ class basic_parser_impl { // non-template implementation of basic_parser<t>
 public:
   basic_parser_impl(Option &) {}
 
-  virtual ~basic_parser_impl() {}
+  virtual ~basic_parser_impl() = default;
 
   enum ValueExpected getValueExpectedFlagDefault() const {
     return ValueRequired;
diff --git a/llvm/include/llvm/Support/Compiler.h b/llvm/include/llvm/Support/Compiler.h
index f4c277fae7cc..f3317049524f 100644
--- a/llvm/include/llvm/Support/Compiler.h
+++ b/llvm/include/llvm/Support/Compiler.h
@@ -77,12 +77,21 @@
 /// * 1916: VS2017, version 15.9
 /// * 1920: VS2019, version 16.0
 /// * 1921: VS2019, version 16.1
+/// * 1922: VS2019, version 16.2
+/// * 1923: VS2019, version 16.3
+/// * 1924: VS2019, version 16.4
+/// * 1925: VS2019, version 16.5
+/// * 1926: VS2019, version 16.6
+/// * 1927: VS2019, version 16.7
+/// * 1928: VS2019, version 16.8 + 16.9
+/// * 1929: VS2019, version 16.10 + 16.11
+/// * 1930: VS2022, version 17.0
 #ifdef _MSC_VER
 #define LLVM_MSC_PREREQ(version) (_MSC_VER >= (version))
 
-// We require at least MSVC 2017.
-#if !LLVM_MSC_PREREQ(1910)
-#error LLVM requires at least MSVC 2017.
+// We require at least VS 2019.
+#if !LLVM_MSC_PREREQ(1920)
+#error LLVM requires at least VS 2019.
 #endif
 
 #else
@@ -94,12 +103,8 @@
 /// Sadly, this is separate from just rvalue reference support because GCC
 /// and MSVC implemented this later than everything else. This appears to be
 /// corrected in MSVC 2019 but not MSVC 2017.
-#if __has_feature(cxx_rvalue_references) || defined(__GNUC__) ||               \
-    LLVM_MSC_PREREQ(1920)
+/// FIXME: Remove LLVM_HAS_RVALUE_REFERENCE_THIS macro
 #define LLVM_HAS_RVALUE_REFERENCE_THIS 1
-#else
-#define LLVM_HAS_RVALUE_REFERENCE_THIS 0
-#endif
 
 /// Expands to '&' if ref-qualifiers for *this are supported.
 ///
diff --git a/llvm/include/llvm/Support/FileOutputBuffer.h b/llvm/include/llvm/Support/FileOutputBuffer.h
index 17b44380e9cd..d4b73522115d 100644
--- a/llvm/include/llvm/Support/FileOutputBuffer.h
+++ b/llvm/include/llvm/Support/FileOutputBuffer.h
@@ -70,7 +70,7 @@ public:
   /// If this object was previously committed, the destructor just deletes
   /// this object.  If this object was not committed, the destructor
   /// deallocates the buffer and the target file is never written.
-  virtual ~FileOutputBuffer() {}
+  virtual ~FileOutputBuffer() = default;
 
   /// This removes the temporary file (unless it already was committed)
   /// but keeps the memory mapping alive.
diff --git a/llvm/include/llvm/Support/FormatVariadicDetails.h b/llvm/include/llvm/Support/FormatVariadicDetails.h
index 2cafc120c1d7..2204cff13a64 100644
--- a/llvm/include/llvm/Support/FormatVariadicDetails.h
+++ b/llvm/include/llvm/Support/FormatVariadicDetails.h
@@ -24,7 +24,7 @@ class format_adapter {
   virtual void anchor();
 
 protected:
-  virtual ~format_adapter() {}
+  virtual ~format_adapter() = default;
 
 public:
   virtual void format(raw_ostream &S, StringRef Options) = 0;
diff --git a/llvm/include/llvm/Support/GenericDomTree.h b/llvm/include/llvm/Support/GenericDomTree.h
index f39400c26eab..d7c64bf62c7a 100644
--- a/llvm/include/llvm/Support/GenericDomTree.h
+++ b/llvm/include/llvm/Support/GenericDomTree.h
@@ -260,7 +260,7 @@ protected:
   friend struct DomTreeBuilder::SemiNCAInfo<DominatorTreeBase>;
 
  public:
-  DominatorTreeBase() {}
+  DominatorTreeBase() = default;
 
   DominatorTreeBase(DominatorTreeBase &&Arg)
       : Roots(std::move(Arg.Roots)),
diff --git a/llvm/include/llvm/Support/GenericIteratedDominanceFrontier.h b/llvm/include/llvm/Support/GenericIteratedDominanceFrontier.h
index 3bafeb48f64a..96105d6b4684 100644
--- a/llvm/include/llvm/Support/GenericIteratedDominanceFrontier.h
+++ b/llvm/include/llvm/Support/GenericIteratedDominanceFrontier.h
@@ -37,7 +37,7 @@ namespace IDFCalculatorDetail {
 /// May be specialized if, for example, one wouldn't like to return nullpointer
 /// successors.
 template <class NodeTy, bool IsPostDom> struct ChildrenGetterTy {
-  using NodeRef = typename GraphTraits<NodeTy>::NodeRef;
+  using NodeRef = typename GraphTraits<NodeTy *>::NodeRef;
   using ChildrenTy = SmallVector<NodeRef, 8>;
 
   ChildrenTy get(const NodeRef &N);
diff --git a/llvm/include/llvm/Support/KnownBits.h b/llvm/include/llvm/Support/KnownBits.h
index 5ef0ba31f785..96b7753e9b20 100644
--- a/llvm/include/llvm/Support/KnownBits.h
+++ b/llvm/include/llvm/Support/KnownBits.h
@@ -31,7 +31,7 @@ private:
 
 public:
   // Default construct Zero and One.
-  KnownBits() {}
+  KnownBits() = default;
 
   /// Create a known bits object of BitWidth bits initialized to unknown.
   KnownBits(unsigned BitWidth) : Zero(BitWidth, 0), One(BitWidth, 0) {}
diff --git a/llvm/include/llvm/Support/RISCVISAInfo.h b/llvm/include/llvm/Support/RISCVISAInfo.h
index b450c1df3558..7fa0e6ee3acf 100644
--- a/llvm/include/llvm/Support/RISCVISAInfo.h
+++ b/llvm/include/llvm/Support/RISCVISAInfo.h
@@ -92,6 +92,9 @@ private:
   void updateFLen();
   void updateMinVLen();
   void updateMaxELen();
+
+  static llvm::Expected<std::unique_ptr<RISCVISAInfo>>
+  postProcessAndChecking(std::unique_ptr<RISCVISAInfo> &&ISAInfo);
 };
 
 } // namespace llvm
diff --git a/llvm/include/llvm/Support/ScopedPrinter.h b/llvm/include/llvm/Support/ScopedPrinter.h
index 9bde4f455a2d..6b5daf710c9f 100644
--- a/llvm/include/llvm/Support/ScopedPrinter.h
+++ b/llvm/include/llvm/Support/ScopedPrinter.h
@@ -115,7 +115,7 @@ public:
     return SP->getKind() == ScopedPrinterKind::Base;
   }
 
-  virtual ~ScopedPrinter() {}
+  virtual ~ScopedPrinter() = default;
 
   void flush() { OS.flush(); }
 
@@ -792,13 +792,13 @@ private:
 struct DelimitedScope {
   DelimitedScope(ScopedPrinter &W) : W(&W) {}
   DelimitedScope() : W(nullptr) {}
-  virtual ~DelimitedScope(){};
+  virtual ~DelimitedScope() = default;
   virtual void setPrinter(ScopedPrinter &W) = 0;
   ScopedPrinter *W;
 };
 
 struct DictScope : DelimitedScope {
-  explicit DictScope() {}
+  explicit DictScope() = default;
   explicit DictScope(ScopedPrinter &W) : DelimitedScope(W) { W.objectBegin(); }
 
   DictScope(ScopedPrinter &W, StringRef N) : DelimitedScope(W) {
@@ -817,7 +817,7 @@ struct DictScope : DelimitedScope {
 };
 
 struct ListScope : DelimitedScope {
-  explicit ListScope() {}
+  explicit ListScope() = default;
   explicit ListScope(ScopedPrinter &W) : DelimitedScope(W) { W.arrayBegin(); }
 
   ListScope(ScopedPrinter &W, StringRef N) : DelimitedScope(W) {
diff --git a/llvm/include/llvm/Support/SuffixTree.h b/llvm/include/llvm/Support/SuffixTree.h
index 352fba511937..162a1de72f1a 100644
--- a/llvm/include/llvm/Support/SuffixTree.h
+++ b/llvm/include/llvm/Support/SuffixTree.h
@@ -109,7 +109,7 @@ struct SuffixTreeNode {
   SuffixTreeNode(unsigned StartIdx, unsigned *EndIdx, SuffixTreeNode *Link)
       : StartIdx(StartIdx), EndIdx(EndIdx), Link(Link) {}
 
-  SuffixTreeNode() {}
+  SuffixTreeNode() = default;
 };
 
 /// A data structure for fast substring queries.
diff --git a/llvm/include/llvm/Support/Timer.h b/llvm/include/llvm/Support/Timer.h
index eb49e805b40d..742d20ce51dd 100644
--- a/llvm/include/llvm/Support/Timer.h
+++ b/llvm/include/llvm/Support/Timer.h
@@ -106,7 +106,7 @@ public:
   ~Timer();
 
   /// Create an uninitialized timer, client must use 'init'.
-  explicit Timer() {}
+  explicit Timer() = default;
   void init(StringRef TimerName, StringRef TimerDescription);
   void init(StringRef TimerName, StringRef TimerDescription, TimerGroup &tg);
 
diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h
index add05bd078d6..1157487eced3 100644
--- a/llvm/include/llvm/TableGen/Record.h
+++ b/llvm/include/llvm/TableGen/Record.h
@@ -2015,7 +2015,7 @@ class Resolver {
 
 public:
   explicit Resolver(Record *CurRec) : CurRec(CurRec) {}
-  virtual ~Resolver() {}
+  virtual ~Resolver() = default;
 
   Record *getCurrentRecord() const { return CurRec; }
 
diff --git a/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h b/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h
index 03ead4bc0714..072ccf7320e8 100644
--- a/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h
+++ b/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h
@@ -18,6 +18,7 @@
 #define LLVM_TRANSFORMS_AGGRESSIVEINSTCOMBINE_AGGRESSIVEINSTCOMBINE_H
 
 #include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
 
diff --git a/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h b/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h
index 6a208dfa6a25..78b2f909f1c9 100644
--- a/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h
+++ b/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h
@@ -15,6 +15,7 @@
 #define LLVM_TRANSFORMS_IPO_ALWAYSINLINER_H
 
 #include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
 
diff --git a/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h b/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h
index 6d6cb58abdbb..225def99678a 100644
--- a/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h
+++ b/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h
@@ -27,14 +27,6 @@ class ArgumentPromotionPass : public PassInfoMixin<ArgumentPromotionPass> {
 public:
   ArgumentPromotionPass(unsigned MaxElements = 3u) : MaxElements(MaxElements) {}
 
-  /// Check if callers and the callee \p F agree how promoted arguments would be
-  /// passed. The ones that they do not agree on are eliminated from the sets but
-  /// the return value has to be observed as well.
-  static bool areFunctionArgsABICompatible(
-      const Function &F, const TargetTransformInfo &TTI,
-      SmallPtrSetImpl<Argument *> &ArgsToPromote,
-      SmallPtrSetImpl<Argument *> &ByValArgsToTransform);
-
   /// Checks if a type could have padding bytes.
   static bool isDenselyPacked(Type *type, const DataLayout &DL);
 
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index d56a43ec7961..7eee16f71d64 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -132,6 +132,7 @@ struct AbstractAttribute;
 struct InformationCache;
 struct AAIsDead;
 struct AttributorCallGraph;
+struct IRPosition;
 
 class AAResults;
 class Function;
@@ -139,6 +140,11 @@ class Function;
 /// Abstract Attribute helper functions.
 namespace AA {
 
+/// Return true if \p I is a `nosync` instruction. Use generic reasoning and
+/// potentially the corresponding AANoSync.
+bool isNoSyncInst(Attributor &A, const Instruction &I,
+                  const AbstractAttribute &QueryingAA);
+
 /// Return true if \p V is dynamically unique, that is, there are no two
 /// "instances" of \p V at runtime with different values.
 bool isDynamicallyUnique(Attributor &A, const AbstractAttribute &QueryingAA,
@@ -185,7 +191,8 @@ Constant *getInitialValueForObj(Value &Obj, Type &Ty,
 bool getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr,
                                  SmallVectorImpl<Value *> &Objects,
                                  const AbstractAttribute &QueryingAA,
-                                 const Instruction *CtxI);
+                                 const Instruction *CtxI,
+                                 bool Intraprocedural = false);
 
 /// Collect all potential values of the one stored by \p SI into
 /// \p PotentialCopies. That is, the only copies that were made via the
@@ -200,6 +207,34 @@ bool getPotentialCopiesOfStoredValue(
     Attributor &A, StoreInst &SI, SmallSetVector<Value *, 4> &PotentialCopies,
     const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation);
 
+/// Return true if \p IRP is readonly. This will query respective AAs that
+/// deduce the information and introduce dependences for \p QueryingAA.
+bool isAssumedReadOnly(Attributor &A, const IRPosition &IRP,
+                       const AbstractAttribute &QueryingAA, bool &IsKnown);
+
+/// Return true if \p IRP is readnone. This will query respective AAs that
+/// deduce the information and introduce dependences for \p QueryingAA.
+bool isAssumedReadNone(Attributor &A, const IRPosition &IRP,
+                       const AbstractAttribute &QueryingAA, bool &IsKnown);
+
+/// Return true if \p ToI is potentially reachable from \p FromI. The two
+/// instructions do not need to be in the same function. \p GoBackwardsCB
+/// can be provided to convey domain knowledge about the "lifespan" the user is
+/// interested in. By default, the callers of \p FromI are checked as well to
+/// determine if \p ToI can be reached. If the query is not interested in
+/// callers beyond a certain point, e.g., a GPU kernel entry or the function
+/// containing an alloca, the \p GoBackwardsCB should return false.
+bool isPotentiallyReachable(
+    Attributor &A, const Instruction &FromI, const Instruction &ToI,
+    const AbstractAttribute &QueryingAA,
+    std::function<bool(const Function &F)> GoBackwardsCB = nullptr);
+
+/// Same as above but it is sufficient to reach any instruction in \p ToFn.
+bool isPotentiallyReachable(
+    Attributor &A, const Instruction &FromI, const Function &ToFn,
+    const AbstractAttribute &QueryingAA,
+    std::function<bool(const Function &F)> GoBackwardsCB);
+
 } // namespace AA
 
 /// The value passed to the line option that defines the maximal initialization
@@ -227,7 +262,7 @@ enum class DepClassTy {
 /// The data structure for the nodes of a dependency graph
 struct AADepGraphNode {
 public:
-  virtual ~AADepGraphNode(){};
+  virtual ~AADepGraphNode() = default;
   using DepTy = PointerIntPair<AADepGraphNode *, 1>;
 
 protected:
@@ -266,8 +301,8 @@ public:
 /// then it means that B depends on A, and when the state of A is
 /// updated, node B should also be updated
 struct AADepGraph {
-  AADepGraph() {}
-  ~AADepGraph() {}
+  AADepGraph() = default;
+  ~AADepGraph() = default;
 
   using DepTy = AADepGraphNode::DepTy;
   static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); }
@@ -334,6 +369,14 @@ struct IRPosition {
     return IRPosition(const_cast<Value &>(V), IRP_FLOAT, CBContext);
   }
 
+  /// Create a position describing the instruction \p I. This is different from
+  /// the value version because call sites are treated as intrusctions rather
+  /// than their return value in this function.
+  static const IRPosition inst(const Instruction &I,
+                               const CallBaseContext *CBContext = nullptr) {
+    return IRPosition(const_cast<Instruction &>(I), IRP_FLOAT, CBContext);
+  }
+
   /// Create a position describing the function scope of \p F.
   /// \p CBContext is used for call base specific analysis.
   static const IRPosition function(const Function &F,
@@ -662,7 +705,7 @@ private:
       break;
     case IRPosition::IRP_FLOAT:
       // Special case for floating functions.
-      if (isa<Function>(AnchorVal))
+      if (isa<Function>(AnchorVal) || isa<CallBase>(AnchorVal))
         Enc = {&AnchorVal, ENC_FLOATING_FUNCTION};
       else
         Enc = {&AnchorVal, ENC_VALUE};
@@ -844,7 +887,7 @@ struct AnalysisGetter {
   }
 
   AnalysisGetter(FunctionAnalysisManager &FAM) : FAM(&FAM) {}
-  AnalysisGetter() {}
+  AnalysisGetter() = default;
 
 private:
   FunctionAnalysisManager *FAM = nullptr;
@@ -879,7 +922,7 @@ struct InformationCache {
             [&](const Function &F) {
               return AG.getAnalysis<PostDominatorTreeAnalysis>(F);
             }),
-        AG(AG), CGSCC(CGSCC), TargetTriple(M.getTargetTriple()) {
+        AG(AG), TargetTriple(M.getTargetTriple()) {
     if (CGSCC)
       initializeModuleSlice(*CGSCC);
   }
@@ -996,13 +1039,6 @@ struct InformationCache {
     return AG.getAnalysis<AP>(F);
   }
 
-  /// Return SCC size on call graph for function \p F or 0 if unknown.
-  unsigned getSccSize(const Function &F) {
-    if (CGSCC && CGSCC->count(const_cast<Function *>(&F)))
-      return CGSCC->size();
-    return 0;
-  }
-
   /// Return datalayout used in the module.
   const DataLayout &getDL() { return DL; }
 
@@ -1092,9 +1128,6 @@ private:
   /// Getters for analysis.
   AnalysisGetter &AG;
 
-  /// The underlying CGSCC, or null if not available.
-  SetVector<Function *> *CGSCC;
-
   /// Set of inlineable functions
   SmallPtrSet<const Function *, 8> InlineableFunctions;
 
@@ -1362,6 +1395,9 @@ struct Attributor {
     return AA;
   }
 
+  /// Allows a query AA to request an update if a new query was received.
+  void registerForUpdate(AbstractAttribute &AA);
+
   /// Explicitly record a dependence from \p FromAA to \p ToAA, that is if
   /// \p FromAA changes \p ToAA should be updated as well.
   ///
@@ -1794,6 +1830,18 @@ public:
                             const AbstractAttribute &QueryingAA,
                             bool RequireAllCallSites, bool &AllCallSitesKnown);
 
+  /// Check \p Pred on all call sites of \p Fn.
+  ///
+  /// This method will evaluate \p Pred on call sites and return
+  /// true if \p Pred holds in every call sites. However, this is only possible
+  /// all call sites are known, hence the function has internal linkage.
+  /// If true is returned, \p AllCallSitesKnown is set if all possible call
+  /// sites of the function have been visited.
+  bool checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
+                            const Function &Fn, bool RequireAllCallSites,
+                            const AbstractAttribute *QueryingAA,
+                            bool &AllCallSitesKnown);
+
   /// Check \p Pred on all values potentially returned by \p F.
   ///
   /// This method will evaluate \p Pred on all values potentially returned by
@@ -1932,18 +1980,6 @@ private:
   /// may trigger further updates. (\see DependenceStack)
   void rememberDependences();
 
-  /// Check \p Pred on all call sites of \p Fn.
-  ///
-  /// This method will evaluate \p Pred on call sites and return
-  /// true if \p Pred holds in every call sites. However, this is only possible
-  /// all call sites are known, hence the function has internal linkage.
-  /// If true is returned, \p AllCallSitesKnown is set if all possible call
-  /// sites of the function have been visited.
-  bool checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
-                            const Function &Fn, bool RequireAllCallSites,
-                            const AbstractAttribute *QueryingAA,
-                            bool &AllCallSitesKnown);
-
   /// Determine if CallBase context in \p IRP should be propagated.
   bool shouldPropagateCallBaseContext(const IRPosition &IRP);
 
@@ -2056,6 +2092,10 @@ private:
   /// Callback to get an OptimizationRemarkEmitter from a Function *.
   Optional<OptimizationRemarkGetter> OREGetter;
 
+  /// Container with all the query AAs that requested an update via
+  /// registerForUpdate.
+  SmallSetVector<AbstractAttribute *, 16> QueryAAsAwaitingUpdate;
+
   /// The name of the pass to emit remarks for.
   const char *PassName = "";
 
@@ -2081,7 +2121,7 @@ private:
 /// additional methods to directly modify the state based if needed. See the
 /// class comments for help.
 struct AbstractState {
-  virtual ~AbstractState() {}
+  virtual ~AbstractState() = default;
 
   /// Return if this abstract state is in a valid state. If false, no
   /// information provided should be used.
@@ -2122,7 +2162,7 @@ template <typename base_ty, base_ty BestState, base_ty WorstState>
 struct IntegerStateBase : public AbstractState {
   using base_t = base_ty;
 
-  IntegerStateBase() {}
+  IntegerStateBase() = default;
   IntegerStateBase(base_t Assumed) : Assumed(Assumed) {}
 
   /// Return the best possible representable state.
@@ -2365,7 +2405,7 @@ struct BooleanState : public IntegerStateBase<bool, true, false> {
   using super = IntegerStateBase<bool, true, false>;
   using base_t = IntegerStateBase::base_t;
 
-  BooleanState() {}
+  BooleanState() = default;
   BooleanState(base_t Assumed) : super(Assumed) {}
 
   /// Set the assumed value to \p Value but never below the known one.
@@ -2773,7 +2813,7 @@ struct AbstractAttribute : public IRPosition, public AADepGraphNode {
   AbstractAttribute(const IRPosition &IRP) : IRPosition(IRP) {}
 
   /// Virtual destructor.
-  virtual ~AbstractAttribute() {}
+  virtual ~AbstractAttribute() = default;
 
   /// This function is used to identify if an \p DGN is of type
   /// AbstractAttribute so that the dyn_cast and cast can use such information
@@ -2793,6 +2833,14 @@ struct AbstractAttribute : public IRPosition, public AADepGraphNode {
   ///    in the `updateImpl` method.
   virtual void initialize(Attributor &A) {}
 
+  /// A query AA is always scheduled as long as we do updates because it does
+  /// lazy computation that cannot be determined to be done from the outside.
+  /// However, while query AAs will not be fixed if they do not have outstanding
+  /// dependences, we will only schedule them like other AAs. If a query AA that
+  /// received a new query it needs to request an update via
+  /// `Attributor::requestUpdateForAA`.
+  virtual bool isQueryAA() const { return false; }
+
   /// Return the internal abstract state for inspection.
   virtual StateType &getState() = 0;
   virtual const StateType &getState() const = 0;
@@ -2989,6 +3037,14 @@ struct AANoSync
   /// Returns true if "nosync" is known.
   bool isKnownNoSync() const { return getKnown(); }
 
+  /// Helper function used to determine whether an instruction is non-relaxed
+  /// atomic. In other words, if an atomic instruction does not have unordered
+  /// or monotonic ordering
+  static bool isNonRelaxedAtomic(const Instruction *I);
+
+  /// Helper function specific for intrinsics which are potentially volatile.
+  static bool isNoSyncIntrinsic(const Instruction *I);
+
   /// Create an abstract attribute view for the position \p IRP.
   static AANoSync &createForPosition(const IRPosition &IRP, Attributor &A);
 
@@ -4419,7 +4475,7 @@ private:
 
 struct AACallGraphNode {
   AACallGraphNode(Attributor &A) : A(A) {}
-  virtual ~AACallGraphNode() {}
+  virtual ~AACallGraphNode() = default;
 
   virtual AACallEdgeIterator optimisticEdgesBegin() const = 0;
   virtual AACallEdgeIterator optimisticEdgesEnd() const = 0;
@@ -4485,7 +4541,7 @@ struct AACallEdges : public StateWrapper<BooleanState, AbstractAttribute>,
 // Synthetic root node for the Attributor's internal call graph.
 struct AttributorCallGraph : public AACallGraphNode {
   AttributorCallGraph(Attributor &A) : AACallGraphNode(A) {}
-  virtual ~AttributorCallGraph() {}
+  virtual ~AttributorCallGraph() = default;
 
   AACallEdgeIterator optimisticEdgesBegin() const override {
     return AACallEdgeIterator(A, A.Functions.begin());
@@ -4592,18 +4648,30 @@ struct AAFunctionReachability
 
   AAFunctionReachability(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
 
+  /// See AbstractAttribute::isQueryAA.
+  bool isQueryAA() const override { return true; }
+
   /// If the function represented by this possition can reach \p Fn.
-  virtual bool canReach(Attributor &A, Function *Fn) const = 0;
+  virtual bool canReach(Attributor &A, const Function &Fn) const = 0;
+
+  /// Can \p CB reach \p Fn.
+  virtual bool canReach(Attributor &A, CallBase &CB,
+                        const Function &Fn) const = 0;
 
-  /// Can \p CB reach \p Fn
-  virtual bool canReach(Attributor &A, CallBase &CB, Function *Fn) const = 0;
+  /// Can  \p Inst reach \p Fn.
+  /// See also AA::isPotentiallyReachable.
+  virtual bool instructionCanReach(Attributor &A, const Instruction &Inst,
+                                   const Function &Fn,
+                                   bool UseBackwards = true) const = 0;
 
   /// Create an abstract attribute view for the position \p IRP.
   static AAFunctionReachability &createForPosition(const IRPosition &IRP,
                                                    Attributor &A);
 
   /// See AbstractAttribute::getName()
-  const std::string getName() const override { return "AAFuncitonReacability"; }
+  const std::string getName() const override {
+    return "AAFunctionReachability";
+  }
 
   /// See AbstractAttribute::getIdAddr()
   const char *getIdAddr() const override { return &ID; }
@@ -4639,21 +4707,12 @@ struct AAPointerInfo : public AbstractAttribute {
            AccessKind Kind, Type *Ty)
         : LocalI(LocalI), RemoteI(RemoteI), Content(Content), Kind(Kind),
           Ty(Ty) {}
-    Access(const Access &Other)
-        : LocalI(Other.LocalI), RemoteI(Other.RemoteI), Content(Other.Content),
-          Kind(Other.Kind), Ty(Other.Ty) {}
+    Access(const Access &Other) = default;
     Access(const Access &&Other)
         : LocalI(Other.LocalI), RemoteI(Other.RemoteI), Content(Other.Content),
           Kind(Other.Kind), Ty(Other.Ty) {}
 
-    Access &operator=(const Access &Other) {
-      LocalI = Other.LocalI;
-      RemoteI = Other.RemoteI;
-      Content = Other.Content;
-      Kind = Other.Kind;
-      Ty = Other.Ty;
-      return *this;
-    }
+    Access &operator=(const Access &Other) = default;
     bool operator==(const Access &R) const {
       return LocalI == R.LocalI && RemoteI == R.RemoteI &&
              Content == R.Content && Kind == R.Kind;
@@ -4741,6 +4800,15 @@ struct AAPointerInfo : public AbstractAttribute {
   virtual bool forallInterferingAccesses(
       StoreInst &SI, function_ref<bool(const Access &, bool)> CB) const = 0;
 
+  /// Call \p CB on all write accesses that might interfere with \p LI and
+  /// return true if all such accesses were known and the callback returned true
+  /// for all of them, false otherwise. In contrast to forallInterferingAccesses
+  /// this function will perform reasoning to exclude write accesses that cannot
+  /// affect the load even if they on the surface look as if they would.
+  virtual bool forallInterferingWrites(
+      Attributor &A, const AbstractAttribute &QueryingAA, LoadInst &LI,
+      function_ref<bool(const Access &, bool)> CB) const = 0;
+
   /// This function should return true if the type of the \p AA is AAPointerInfo
   static bool classof(const AbstractAttribute *AA) {
     return (AA->getIdAddr() == &ID);
diff --git a/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h b/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h
index fd99843d0449..a2b93f8aa30d 100644
--- a/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h
+++ b/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h
@@ -14,6 +14,7 @@
 #define LLVM_TRANSFORMS_IPO_FORCEFUNCTIONATTRS_H
 
 #include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
 
diff --git a/llvm/include/llvm/Transforms/IPO/IROutliner.h b/llvm/include/llvm/Transforms/IPO/IROutliner.h
index ed74c8ed0e96..e4807a1c9c65 100644
--- a/llvm/include/llvm/Transforms/IPO/IROutliner.h
+++ b/llvm/include/llvm/Transforms/IPO/IROutliner.h
@@ -337,11 +337,9 @@ private:
   /// be analyzed for similarity.  This is needed as there may be instruction we
   /// can identify as having similarity, but are more complicated to outline.
   struct InstructionAllowed : public InstVisitor<InstructionAllowed, bool> {
-    InstructionAllowed() {}
+    InstructionAllowed() = default;
 
-    bool visitBranchInst(BranchInst &BI) { 
-      return EnableBranches;
-    }
+    bool visitBranchInst(BranchInst &BI) { return EnableBranches; }
     bool visitPHINode(PHINode &PN) { return EnableBranches; }
     // TODO: Handle allocas.
     bool visitAllocaInst(AllocaInst &AI) { return false; }
@@ -359,7 +357,7 @@ private:
     bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return true; }
     // TODO: Handle specific intrinsics individually from those that can be
     // handled.
-    bool IntrinsicInst(IntrinsicInst &II) { return false; }
+    bool IntrinsicInst(IntrinsicInst &II) { return EnableIntrinsics; }
     // We only handle CallInsts that are not indirect, since we cannot guarantee
     // that they have a name in these cases.
     bool visitCallInst(CallInst &CI) {
@@ -395,6 +393,10 @@ private:
     // The flag variable that marks whether we should allow indirect calls
     // to be outlined.
     bool EnableIndirectCalls = true;
+
+    // The flag variable that marks whether we should allow intrinsics
+    // instructions to be outlined.
+    bool EnableIntrinsics = false;
   };
 
   /// A InstVisitor used to exclude certain instructions from being outlined.
diff --git a/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h b/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h
index bb7907fb8ac8..302695d96355 100644
--- a/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h
+++ b/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h
@@ -17,6 +17,7 @@
 
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
 
diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfile.h b/llvm/include/llvm/Transforms/IPO/SampleProfile.h
index 2b05aaf320cf..704b793ab3ea 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfile.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfile.h
@@ -15,6 +15,7 @@
 #define LLVM_TRANSFORMS_IPO_SAMPLEPROFILE_H
 
 #include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
 #include <string>
 
 namespace llvm {
diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
index 43f4bc78140f..e73c36043cb2 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
@@ -154,7 +154,7 @@ class PseudoProbeUpdatePass : public PassInfoMixin<PseudoProbeUpdatePass> {
   void runOnFunction(Function &F, FunctionAnalysisManager &FAM);
 
 public:
-  PseudoProbeUpdatePass() {}
+  PseudoProbeUpdatePass() = default;
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 };
 
diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
index f8cb6dc73a6f..ae19fbfb49a7 100644
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
+++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
@@ -93,7 +93,7 @@ public:
         MinimizeSize(MinimizeSize), AA(AA), AC(AC), TLI(TLI), DT(DT), DL(DL),
         SQ(DL, &TLI, &DT, &AC), ORE(ORE), BFI(BFI), PSI(PSI), LI(LI) {}
 
-  virtual ~InstCombiner() {}
+  virtual ~InstCombiner() = default;
 
   /// Return the source operand of a potentially bitcasted value while
   /// optionally checking if it has one use. If there is no bitcast or the one
diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h
index 6002f0270083..a0d8118c23f7 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h
@@ -16,6 +16,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
 #include "llvm/Transforms/Instrumentation/AddressSanitizerOptions.h"
 
 namespace llvm {
diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h
index 5a0fb835606a..0a5456c5956f 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h
@@ -66,7 +66,7 @@ bool forAllReachableExits(const DominatorTree &DT, const PostDominatorTree &PDT,
   }
   SmallVector<Instruction *, 8> ReachableRetVec;
   unsigned NumCoveredExits = 0;
-  for (auto &RI : RetVec) {
+  for (auto *RI : RetVec) {
     if (!isPotentiallyReachable(Start, RI, nullptr, &DT))
       continue;
     ReachableRetVec.push_back(RI);
@@ -83,7 +83,7 @@ bool forAllReachableExits(const DominatorTree &DT, const PostDominatorTree &PDT,
     for (auto *End : Ends)
       Callback(End);
   } else {
-    for (auto &RI : ReachableRetVec)
+    for (auto *RI : ReachableRetVec)
       Callback(RI);
     // We may have inserted untag outside of the lifetime interval.
     // Signal the caller to remove the lifetime end call for this alloca.
diff --git a/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h b/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h
index 8d70f1429b99..76d586252743 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h
@@ -10,6 +10,7 @@
 #define LLVM_TRANSFORMS_INSTRUMENTATION_BOUNDSCHECKING_H
 
 #include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
 
diff --git a/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h
index 3118a3762935..70949026a892 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h
@@ -15,6 +15,7 @@
 
 #include "llvm/IR/Function.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
 
diff --git a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
index 64523d7d073c..5873db22a5d1 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
@@ -87,21 +87,32 @@ private:
   /// Count the number of instrumented value sites for the function.
   void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins);
 
-  /// Replace instrprof_value_profile with a call to runtime library.
+  /// Replace instrprof.value.profile with a call to runtime library.
   void lowerValueProfileInst(InstrProfValueProfileInst *Ins);
 
-  /// Replace instrprof_increment with an increment of the appropriate value.
+  /// Replace instrprof.cover with a store instruction to the coverage byte.
+  void lowerCover(InstrProfCoverInst *Inc);
+
+  /// Replace instrprof.increment with an increment of the appropriate value.
   void lowerIncrement(InstrProfIncrementInst *Inc);
 
   /// Force emitting of name vars for unused functions.
   void lowerCoverageData(GlobalVariable *CoverageNamesVar);
 
+  /// Compute the address of the counter value that this profiling instruction
+  /// acts on.
+  Value *getCounterAddress(InstrProfInstBase *I);
+
   /// Get the region counters for an increment, creating them if necessary.
   ///
   /// If the counter array doesn't yet exist, the profile data variables
   /// referring to them will also be created.
   GlobalVariable *getOrCreateRegionCounters(InstrProfInstBase *Inc);
 
+  /// Create the region counters.
+  GlobalVariable *createRegionCounters(InstrProfInstBase *Inc, StringRef Name,
+                                       GlobalValue::LinkageTypes Linkage);
+
   /// Emit the section with compressed function names.
   void emitNameData();
 
diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
index f4d1b1d90e6f..b9ad56ba7509 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
@@ -15,6 +15,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
 
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
index 7ba9d65cae55..e83cc2b9bef0 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
@@ -73,7 +73,7 @@ class PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &,
           PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &,
                       LPMUpdater &>> {
 public:
-  explicit PassManager() {}
+  explicit PassManager() = default;
 
   // FIXME: These are equivalent to the default move constructor/move
   // assignment. However, using = default triggers linker errors due to the
diff --git a/llvm/include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h b/llvm/include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h
index a5ad4a2192a0..61c7bf0454e1 100644
--- a/llvm/include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h
+++ b/llvm/include/llvm/Transforms/Scalar/LowerConstantIntrinsics.h
@@ -23,7 +23,7 @@ namespace llvm {
 struct LowerConstantIntrinsicsPass :
     PassInfoMixin<LowerConstantIntrinsicsPass> {
 public:
-  explicit LowerConstantIntrinsicsPass() {}
+  explicit LowerConstantIntrinsicsPass() = default;
 
   /// Run the pass over the function.
   ///
diff --git a/llvm/include/llvm/Transforms/Scalar/Scalarizer.h b/llvm/include/llvm/Transforms/Scalar/Scalarizer.h
index 81363130e2e3..f4472e699295 100644
--- a/llvm/include/llvm/Transforms/Scalar/Scalarizer.h
+++ b/llvm/include/llvm/Transforms/Scalar/Scalarizer.h
@@ -18,6 +18,7 @@
 #define LLVM_TRANSFORMS_SCALAR_SCALARIZER_H
 
 #include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
 
diff --git a/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h b/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h
index 04a5f7e6ff38..64691d68b1c4 100644
--- a/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h
+++ b/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h
@@ -14,6 +14,7 @@
 #define LLVM_TRANSFORMS_SCALAR_WARNMISSEDTRANSFORMS_H
 
 #include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
 class Function;
@@ -22,7 +23,7 @@ class Function;
 class WarnMissedTransformationsPass
     : public PassInfoMixin<WarnMissedTransformationsPass> {
 public:
-  explicit WarnMissedTransformationsPass() {}
+  explicit WarnMissedTransformationsPass() = default;
 
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
diff --git a/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h b/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h
index a497722eece6..d679bca69510 100644
--- a/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h
+++ b/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h
@@ -20,6 +20,7 @@
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
 class AssumptionCache;
diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 8970afb3aeaa..d99b2a56559d 100644
--- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -46,9 +46,9 @@ class Value;
 /// instruction. If \p Updates is specified, collect all necessary DT updates
 /// into this vector. If \p KeepOneInputPHIs is true, one-input Phis in
 /// successors of blocks being deleted will be preserved.
-void DetatchDeadBlocks(ArrayRef <BasicBlock *> BBs,
-                       SmallVectorImpl<DominatorTree::UpdateType> *Updates,
-                       bool KeepOneInputPHIs = false);
+void detachDeadBlocks(ArrayRef <BasicBlock *> BBs,
+                      SmallVectorImpl<DominatorTree::UpdateType> *Updates,
+                      bool KeepOneInputPHIs = false);
 
 /// Delete the specified block, which must have no predecessors.
 void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU = nullptr,
diff --git a/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h b/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h
index f8211d60938e..e12d7e09aad6 100644
--- a/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h
+++ b/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h
@@ -53,7 +53,7 @@ class CallGraphUpdater {
   ///}
 
 public:
-  CallGraphUpdater() {}
+  CallGraphUpdater() = default;
   ~CallGraphUpdater() { finalize(); }
 
   /// Initializers for usage outside of a CGSCC pass, inside a CGSCC pass in
diff --git a/llvm/include/llvm/Transforms/Utils/Debugify.h b/llvm/include/llvm/Transforms/Utils/Debugify.h
index 0f1c7ec724df..892e354cd9ed 100644
--- a/llvm/include/llvm/Transforms/Utils/Debugify.h
+++ b/llvm/include/llvm/Transforms/Utils/Debugify.h
@@ -21,6 +21,7 @@
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/ValueHandle.h"
+#include "llvm/Pass.h"
 
 using DebugFnMap = llvm::MapVector<llvm::StringRef, const llvm::DISubprogram *>;
 using DebugInstMap = llvm::MapVector<const llvm::Instruction *, bool>;
diff --git a/llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h b/llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h
index 84e4fee51c26..af9cdb9fd619 100644
--- a/llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h
+++ b/llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h
@@ -15,6 +15,7 @@
 
 #include "llvm/IR/PassManager.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
 class InjectTLIMappings : public PassInfoMixin<InjectTLIMappings> {
diff --git a/llvm/include/llvm/Transforms/Utils/LoopPeel.h b/llvm/include/llvm/Transforms/Utils/LoopPeel.h
index 7b6595c192de..07dabaeaa907 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopPeel.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopPeel.h
@@ -21,7 +21,7 @@ namespace llvm {
 bool canPeel(Loop *L);
 
 bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE,
-              DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA);
+              DominatorTree &DT, AssumptionCache *AC, bool PreserveLCSSA);
 
 TargetTransformInfo::PeelingPreferences
 gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
diff --git a/llvm/include/llvm/Transforms/Utils/ModuleUtils.h b/llvm/include/llvm/Transforms/Utils/ModuleUtils.h
index 9bbe8ea7e1e8..8d459972336b 100644
--- a/llvm/include/llvm/Transforms/Utils/ModuleUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/ModuleUtils.h
@@ -15,6 +15,7 @@
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include <utility> // for std::pair
 
 namespace llvm {
@@ -106,6 +107,10 @@ void filterDeadComdatFunctions(
 /// unique identifier for this module, so we return the empty string.
 std::string getUniqueModuleId(Module *M);
 
+/// Embed the memory buffer \p Buf into the module \p M as a global using the
+/// specified section name.
+void embedBufferInModule(Module &M, MemoryBufferRef Buf, StringRef SectionName);
+
 class CallInst;
 namespace VFABI {
 /// Overwrite the Vector Function ABI variants attribute with the names provide
diff --git a/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h b/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h
index 5de575aed059..ad24cb454d5e 100644
--- a/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h
+++ b/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h
@@ -42,7 +42,7 @@ class SSAUpdaterBulk {
     SmallVector<Use *, 4> Uses;
     StringRef Name;
     Type *Ty;
-    RewriteInfo(){};
+    RewriteInfo() = default;
     RewriteInfo(StringRef &N, Type *T) : Name(N), Ty(T){};
   };
   SmallVector<RewriteInfo, 4> Rewrites;
@@ -52,10 +52,10 @@ class SSAUpdaterBulk {
   Value *computeValueAt(BasicBlock *BB, RewriteInfo &R, DominatorTree *DT);
 
 public:
-  explicit SSAUpdaterBulk(){};
+  explicit SSAUpdaterBulk() = default;
   SSAUpdaterBulk(const SSAUpdaterBulk &) = delete;
   SSAUpdaterBulk &operator=(const SSAUpdaterBulk &) = delete;
-  ~SSAUpdaterBulk(){};
+  ~SSAUpdaterBulk() = default;
 
   /// Add a new variable to the SSA rewriter. This needs to be called before
   /// AddAvailableValue or AddUse calls. The return value is the variable ID,
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h b/llvm/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h
index f72c76c6f0f2..3636285e38f5 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h
@@ -10,6 +10,7 @@
 #define LLVM_TRANSFORMS_VECTORIZE_LOADSTOREVECTORIZER_H
 
 #include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
 
diff --git a/llvm/include/llvm/module.modulemap b/llvm/include/llvm/module.modulemap
index 25c7aeee148e..d0693ccfd8f6 100644
--- a/llvm/include/llvm/module.modulemap
+++ b/llvm/include/llvm/module.modulemap
@@ -60,6 +60,7 @@ module LLVM_BinaryFormat {
     textual header "BinaryFormat/DynamicTags.def"
     textual header "BinaryFormat/MachO.def"
     textual header "BinaryFormat/MinidumpConstants.def"
+    textual header "BinaryFormat/Swift.def"
     textual header "BinaryFormat/ELFRelocs/AArch64.def"
     textual header "BinaryFormat/ELFRelocs/AMDGPU.def"
     textual header "BinaryFormat/ELFRelocs/ARM.def"
diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index b4c985962837..0a0b53796add 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -1010,10 +1010,13 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
       return ModRefInfo::NoModRef;
   }
 
-  // The semantics of memcpy intrinsics either exactly overlap or do not
-  // overlap, i.e., source and destination of any given memcpy are either
-  // no-alias or must-alias.
-  if (auto *Inst = dyn_cast<AnyMemCpyInst>(Call)) {
+  // Ideally, there should be no need to special case for memcpy/memove
+  // intrinsics here since general machinery (based on memory attributes) should
+  // already handle it just fine. Unfortunately, it doesn't due to deficiency in
+  // operand bundles support. At the moment it's not clear if complexity behind
+  // enhancing general mechanism worths it.
+  // TODO: Consider improving operand bundles support in general mechanism.
+  if (auto *Inst = dyn_cast<AnyMemTransferInst>(Call)) {
     AliasResult SrcAA =
         getBestAAResults().alias(MemoryLocation::getForSource(Inst), Loc, AAQI);
     AliasResult DestAA =
diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
index d2f0c57f6dab..01681c47418a 100644
--- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
+++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
@@ -39,6 +39,10 @@ cl::opt<bool>
     MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden,
                      cl::desc("only allow matching call instructions if the "
                               "name and type signature match."));
+
+cl::opt<bool>
+    DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden,
+                      cl::desc("Don't match or outline intrinsics"));
 } // namespace llvm
 
 IRInstructionData::IRInstructionData(Instruction &I, bool Legality,
@@ -109,6 +113,24 @@ void IRInstructionData::setCalleeName(bool MatchByName) {
   assert(CI && "Instruction must be call");
 
   CalleeName = "";
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+    // To hash intrinsics, we use the opcode, and types like the other
+    // instructions, but also, the Intrinsic ID, and the Name of the
+    // intrinsic.
+    Intrinsic::ID IntrinsicID = II->getIntrinsicID();
+    FunctionType *FT = II->getFunctionType();
+    // If there is an overloaded name, we have to use the complex version
+    // of getName to get the entire string.
+    if (Intrinsic::isOverloaded(IntrinsicID))
+      CalleeName =
+          Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT);
+    // If there is not an overloaded name, we only need to use this version.
+    else
+      CalleeName = Intrinsic::getName(IntrinsicID).str();
+
+    return;
+  }
+
   if (!CI->isIndirectCall() && MatchByName)
     CalleeName = CI->getCalledFunction()->getName().str();
 }
@@ -232,7 +254,7 @@ bool IRSimilarity::isClose(const IRInstructionData &A,
   // name is the same.  We already know that the types are since is
   // isSameOperationAs is true.
   if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) {
-    if (A.getCalleeName().str().compare(B.getCalleeName().str()) != 0)
+    if (A.getCalleeName().str() != B.getCalleeName().str())
       return false;
   }
 
@@ -1139,6 +1161,7 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(
   Mapper.InstClassifier.EnableBranches = this->EnableBranches;
   Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
   Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
+  Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
 
   populateMapper(Modules, InstrList, IntegerMapping);
   findCandidates(InstrList, IntegerMapping);
@@ -1151,6 +1174,7 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) {
   Mapper.InstClassifier.EnableBranches = this->EnableBranches;
   Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
   Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
+  Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
 
   std::vector<IRInstructionData *> InstrList;
   std::vector<unsigned> IntegerMapping;
@@ -1172,7 +1196,7 @@ IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass()
 
 bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) {
   IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
-                                        MatchCallsByName));
+                                        MatchCallsByName, !DisableIntrinsics));
   return false;
 }
 
@@ -1189,9 +1213,8 @@ bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) {
 AnalysisKey IRSimilarityAnalysis::Key;
 IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M,
                                                  ModuleAnalysisManager &) {
-
   auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
-                                     MatchCallsByName);
+                                     MatchCallsByName, !DisableIntrinsics);
   IRSI.findSimilarity(M);
   return IRSI;
 }
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index b71b39334ace..4775340b3438 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -951,7 +951,7 @@ static Value *simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0,
 
   // X / undef -> poison
   // X % undef -> poison
-  if (Q.isUndefValue(Op1))
+  if (Q.isUndefValue(Op1) || isa<PoisonValue>(Op1))
     return PoisonValue::get(Ty);
 
   // X / 0 -> poison
@@ -2418,6 +2418,10 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
   if (Constant *C = foldOrCommuteConstant(Instruction::Xor, Op0, Op1, Q))
     return C;
 
+  // X ^ poison -> poison
+  if (isa<PoisonValue>(Op1))
+    return Op1;
+
   // A ^ undef -> undef
   if (Q.isUndefValue(Op1))
     return Op1;
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index 0fbf1db0685d..cd0d4d6b9ca8 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -208,7 +208,7 @@ bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Align Alignment,
 }
 
 bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Type *Ty,
-                                              MaybeAlign MA,
+                                              Align Alignment,
                                               const DataLayout &DL,
                                               const Instruction *CtxI,
                                               const DominatorTree *DT,
@@ -223,8 +223,6 @@ bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Type *Ty,
   // determine the exact offset to the attributed variable, we can use that
   // information here.
 
-  // Require ABI alignment for loads without alignment specification
-  const Align Alignment = DL.getValueOrABITypeAlignment(MA, Ty);
   APInt AccessSize(DL.getPointerTypeSizeInBits(V->getType()),
                    DL.getTypeStoreSize(Ty));
   return isDereferenceableAndAlignedPointer(V, Alignment, AccessSize, DL, CtxI,
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index dd6958716127..b161c490a6bc 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -1107,6 +1107,10 @@ int llvm::getIntLoopAttribute(const Loop *TheLoop, StringRef Name,
   return getOptionalIntLoopAttribute(TheLoop, Name).getValueOr(Default);
 }
 
+bool llvm::isFinite(const Loop *L) {
+  return L->getHeader()->getParent()->willReturn();
+}
+
 static const char *LLVMLoopMustProgress = "llvm.loop.mustprogress";
 
 bool llvm::hasMustProgress(const Loop *L) {
diff --git a/llvm/lib/Analysis/MemDerefPrinter.cpp b/llvm/lib/Analysis/MemDerefPrinter.cpp
index 30937a2e4931..82617c7256a5 100644
--- a/llvm/lib/Analysis/MemDerefPrinter.cpp
+++ b/llvm/lib/Analysis/MemDerefPrinter.cpp
@@ -59,8 +59,8 @@ bool MemDerefPrinter::runOnFunction(Function &F) {
       Value *PO = LI->getPointerOperand();
       if (isDereferenceablePointer(PO, LI->getType(), DL))
         Deref.push_back(PO);
-      if (isDereferenceableAndAlignedPointer(PO, LI->getType(),
-                                             MaybeAlign(LI->getAlign()), DL))
+      if (isDereferenceableAndAlignedPointer(PO, LI->getType(), LI->getAlign(),
+                                             DL))
         DerefAndAligned.insert(PO);
     }
   }
@@ -94,8 +94,8 @@ PreservedAnalyses MemDerefPrinterPass::run(Function &F,
       Value *PO = LI->getPointerOperand();
       if (isDereferenceablePointer(PO, LI->getType(), DL))
         Deref.push_back(PO);
-      if (isDereferenceableAndAlignedPointer(PO, LI->getType(),
-                                             MaybeAlign(LI->getAlign()), DL))
+      if (isDereferenceableAndAlignedPointer(PO, LI->getType(), LI->getAlign(),
+                                             DL))
         DerefAndAligned.insert(PO);
     }
   }
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 07aac1523b47..977fc0911355 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -3486,7 +3486,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
   return S;
 }
 
-const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) {
+APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) {
   APInt A = C1->getAPInt().abs();
   APInt B = C2->getAPInt().abs();
   uint32_t ABW = A.getBitWidth();
@@ -7017,7 +7017,7 @@ bool ScalarEvolution::loopIsFiniteByAssumption(const Loop *L) {
   // A mustprogress loop without side effects must be finite.
   // TODO: The check used here is very conservative.  It's only *specific*
   // side effects which are well defined in infinite loops.
-  return isMustProgress(L) && loopHasNoSideEffects(L);
+  return isFinite(L) || (isMustProgress(L) && loopHasNoSideEffects(L));
 }
 
 const SCEV *ScalarEvolution::createSCEV(Value *V) {
@@ -8466,8 +8466,11 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
     Pred = ICmpInst::getSwappedPredicate(Pred);
   }
 
+  bool ControllingFiniteLoop =
+      ControlsExit && loopHasNoAbnormalExits(L) && loopIsFiniteByAssumption(L);
   // Simplify the operands before analyzing them.
-  (void)SimplifyICmpOperands(Pred, LHS, RHS);
+  (void)SimplifyICmpOperands(Pred, LHS, RHS, /*Depth=*/0,
+                             ControllingFiniteLoop);
 
   // If we have a comparison of a chrec against a constant, try to use value
   // ranges to answer this query.
@@ -8487,9 +8490,7 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
   // the same values on self-wrap of the IV, then we can infer that IV
   // doesn't self wrap because if it did, we'd have an infinite (undefined)
   // loop.
-  if (ControlsExit && isLoopInvariant(RHS, L) && loopHasNoAbnormalExits(L) &&
-      loopIsFiniteByAssumption(L)) {
-
+  if (ControllingFiniteLoop && isLoopInvariant(RHS, L)) {
     // TODO: We can peel off any functions which are invertible *in L*.  Loop
     // invariant terms are effectively constants for our purposes here.
     auto *InnerLHS = LHS;
@@ -9940,7 +9941,8 @@ static bool HasSameValue(const SCEV *A, const SCEV *B) {
 
 bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
                                            const SCEV *&LHS, const SCEV *&RHS,
-                                           unsigned Depth) {
+                                           unsigned Depth,
+                                           bool ControllingFiniteLoop) {
   bool Changed = false;
   // Simplifies ICMP to trivial true or false by turning it into '0 == 0' or
   // '0 != 0'.
@@ -10069,10 +10071,15 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
   }
 
   // If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by
-  // adding or subtracting 1 from one of the operands.
+  // adding or subtracting 1 from one of the operands. This can be done for
+  // one of two reasons:
+  // 1) The range of the RHS does not include the (signed/unsigned) boundaries
+  // 2) The loop is finite, with this comparison controlling the exit. Since the
+  // loop is finite, the bound cannot include the corresponding boundary
+  // (otherwise it would loop forever).
   switch (Pred) {
   case ICmpInst::ICMP_SLE:
-    if (!getSignedRangeMax(RHS).isMaxSignedValue()) {
+    if (ControllingFiniteLoop || !getSignedRangeMax(RHS).isMaxSignedValue()) {
       RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
                        SCEV::FlagNSW);
       Pred = ICmpInst::ICMP_SLT;
@@ -10085,7 +10092,7 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
     }
     break;
   case ICmpInst::ICMP_SGE:
-    if (!getSignedRangeMin(RHS).isMinSignedValue()) {
+    if (ControllingFiniteLoop || !getSignedRangeMin(RHS).isMinSignedValue()) {
       RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
                        SCEV::FlagNSW);
       Pred = ICmpInst::ICMP_SGT;
@@ -10098,7 +10105,7 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
     }
     break;
   case ICmpInst::ICMP_ULE:
-    if (!getUnsignedRangeMax(RHS).isMaxValue()) {
+    if (ControllingFiniteLoop || !getUnsignedRangeMax(RHS).isMaxValue()) {
       RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
                        SCEV::FlagNUW);
       Pred = ICmpInst::ICMP_ULT;
@@ -10110,7 +10117,7 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
     }
     break;
   case ICmpInst::ICMP_UGE:
-    if (!getUnsignedRangeMin(RHS).isMinValue()) {
+    if (ControllingFiniteLoop || !getUnsignedRangeMin(RHS).isMinValue()) {
       RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS);
       Pred = ICmpInst::ICMP_UGT;
       Changed = true;
@@ -10130,7 +10137,8 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
   // Recursively simplify until we either hit a recursion limit or nothing
   // changes.
   if (Changed)
-    return SimplifyICmpOperands(Pred, LHS, RHS, Depth+1);
+    return SimplifyICmpOperands(Pred, LHS, RHS, Depth + 1,
+                                ControllingFiniteLoop);
 
   return Changed;
 }
@@ -10911,7 +10919,8 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
     // For unsigned and equality predicates, try to prove that both found
     // operands fit into narrow unsigned range. If so, try to prove facts in
     // narrow types.
-    if (!CmpInst::isSigned(FoundPred) && !FoundLHS->getType()->isPointerTy()) {
+    if (!CmpInst::isSigned(FoundPred) && !FoundLHS->getType()->isPointerTy() &&
+        !FoundRHS->getType()->isPointerTy()) {
       auto *NarrowType = LHS->getType();
       auto *WideType = FoundLHS->getType();
       auto BitWidth = getTypeSizeInBits(NarrowType);
@@ -10929,7 +10938,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
       }
     }
 
-    if (LHS->getType()->isPointerTy())
+    if (LHS->getType()->isPointerTy() || RHS->getType()->isPointerTy())
       return false;
     if (CmpInst::isSigned(Pred)) {
       LHS = getSignExtendExpr(LHS, FoundLHS->getType());
@@ -10940,7 +10949,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
     }
   } else if (getTypeSizeInBits(LHS->getType()) >
       getTypeSizeInBits(FoundLHS->getType())) {
-    if (FoundLHS->getType()->isPointerTy())
+    if (FoundLHS->getType()->isPointerTy() || FoundRHS->getType()->isPointerTy())
       return false;
     if (CmpInst::isSigned(FoundPred)) {
       FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType());
diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 23dbb32f38de..627a78a2a2fd 100644
--- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -786,3 +786,36 @@ MDNode *AAMDNodes::shiftTBAAStruct(MDNode *MD, size_t Offset) {
   }
   return MDNode::get(MD->getContext(), Sub);
 }
+
+MDNode *AAMDNodes::extendToTBAA(MDNode *MD, ssize_t Len) {
+  // Fast path if 0-length
+  if (Len == 0)
+    return nullptr;
+
+  // Regular TBAA is invariant of length, so we only need to consider
+  // struct-path TBAA.
+  if (!isStructPathTBAA(MD))
+    return MD;
+
+  TBAAStructTagNode Tag(MD);
+
+  // Only new format TBAA has a size
+  if (!Tag.isNewFormat())
+    return MD;
+
+  // If unknown size, drop the TBAA.
+  if (Len == -1)
+    return nullptr;
+
+  // Otherwise, create TBAA with the new Len
+  SmallVector<Metadata *, 4> NextNodes(MD->operands());
+  ConstantInt *PreviousSize = mdconst::extract<ConstantInt>(NextNodes[3]);
+
+  // Don't create a new MDNode if it is the same length.
+  if (PreviousSize->equalsInt(Len))
+    return MD;
+
+  NextNodes[3] =
+      ConstantAsMetadata::get(ConstantInt::get(PreviousSize->getType(), Len));
+  return MDNode::get(MD->getContext(), NextNodes);
+}
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 34358739f9a8..c14bdb8bc262 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -4559,8 +4559,8 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
       return false;
     const DataLayout &DL = LI->getModule()->getDataLayout();
     return isDereferenceableAndAlignedPointer(
-        LI->getPointerOperand(), LI->getType(), MaybeAlign(LI->getAlign()), DL,
-        CtxI, DT, TLI);
+        LI->getPointerOperand(), LI->getType(), LI->getAlign(), DL, CtxI, DT,
+        TLI);
   }
   case Instruction::Call: {
     auto *CI = cast<const CallInst>(Inst);
diff --git a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
index 99d2c8221281..0d28d93c93c0 100644
--- a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
+++ b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
@@ -117,15 +117,28 @@ bool MetadataVerifier::verifyKernelArgs(msgpack::DocNode &Node) {
                                .Case("image", true)
                                .Case("pipe", true)
                                .Case("queue", true)
+                               .Case("hidden_block_count_x", true)
+                               .Case("hidden_block_count_y", true)
+                               .Case("hidden_block_count_z", true)
+                               .Case("hidden_group_size_x", true)
+                               .Case("hidden_group_size_y", true)
+                               .Case("hidden_group_size_z", true)
+                               .Case("hidden_remainder_x", true)
+                               .Case("hidden_remainder_y", true)
+                               .Case("hidden_remainder_z", true)
                                .Case("hidden_global_offset_x", true)
                                .Case("hidden_global_offset_y", true)
                                .Case("hidden_global_offset_z", true)
+                               .Case("hidden_grid_dims", true)
                                .Case("hidden_none", true)
                                .Case("hidden_printf_buffer", true)
                                .Case("hidden_hostcall_buffer", true)
                                .Case("hidden_default_queue", true)
                                .Case("hidden_completion_action", true)
                                .Case("hidden_multigrid_sync_arg", true)
+                               .Case("hidden_private_base", true)
+                               .Case("hidden_shared_base", true)
+                               .Case("hidden_queue_ptr", true)
                                .Default(false);
                          }))
     return false;
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index eb4e09ea3a26..4bba0b356675 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -4669,7 +4669,7 @@ void IndexBitcodeWriter::write() {
 // where it will be written in a new bitcode block. This is used when
 // writing the combined index file for ThinLTO. When writing a subset of the
 // index for a distributed backend, provide a \p ModuleToSummariesForIndex map.
-void llvm::WriteIndexToFile(
+void llvm::writeIndexToFile(
     const ModuleSummaryIndex &Index, raw_ostream &Out,
     const std::map<std::string, GVSummaryMapTy> *ModuleToSummariesForIndex) {
   SmallVector<char, 0> Buffer;
@@ -4829,7 +4829,7 @@ void BitcodeWriter::writeThinLinkBitcode(const Module &M,
 // Write the specified thin link bitcode file to the given raw output stream,
 // where it will be written in a new bitcode block. This is used when
 // writing the per-module index file for ThinLTO.
-void llvm::WriteThinLinkBitcodeToFile(const Module &M, raw_ostream &Out,
+void llvm::writeThinLinkBitcodeToFile(const Module &M, raw_ostream &Out,
                                       const ModuleSummaryIndex &Index,
                                       const ModuleHash &ModHash) {
   SmallVector<char, 0> Buffer;
@@ -4881,7 +4881,7 @@ static const char *getSectionNameForCommandline(const Triple &T) {
   llvm_unreachable("Unimplemented ObjectFormatType");
 }
 
-void llvm::EmbedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf,
+void llvm::embedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf,
                                 bool EmbedBitcode, bool EmbedCmdline,
                                 const std::vector<uint8_t> &CmdArgs) {
   // Save llvm.compiler.used and remove it.
diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp
index e8fef505e43d..cdf5586766da 100644
--- a/llvm/lib/CodeGen/Analysis.cpp
+++ b/llvm/lib/CodeGen/Analysis.cpp
@@ -585,7 +585,7 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I,
   // goes, they shouldn't affect whether the call is a tail call.
   for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
                            Attribute::DereferenceableOrNull, Attribute::NoAlias,
-                           Attribute::NonNull}) {
+                           Attribute::NonNull, Attribute::NoUndef}) {
     CallerAttrs.removeAttribute(Attr);
     CalleeAttrs.removeAttribute(Attr);
   }
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 4f3f798fe6f8..3e8e190eecc3 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1647,8 +1647,18 @@ void AsmPrinter::emitGlobalAlias(Module &M, const GlobalAlias &GA) {
 
   // Set the symbol type to function if the alias has a function type.
   // This affects codegen when the aliasee is not a function.
-  if (IsFunction)
+  if (IsFunction) {
     OutStreamer->emitSymbolAttribute(Name, MCSA_ELF_TypeFunction);
+    if (TM.getTargetTriple().isOSBinFormatCOFF()) {
+      OutStreamer->BeginCOFFSymbolDef(Name);
+      OutStreamer->EmitCOFFSymbolStorageClass(
+          GA.hasLocalLinkage() ? COFF::IMAGE_SYM_CLASS_STATIC
+                               : COFF::IMAGE_SYM_CLASS_EXTERNAL);
+      OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+                                      << COFF::SCT_COMPLEX_TYPE_SHIFT);
+      OutStreamer->EndCOFFSymbolDef();
+    }
+  }
 
   emitVisibility(Name, GA.getVisibility());
 
diff --git a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index 1a0256f30d41..396322c4979d 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -314,8 +314,7 @@ unsigned DIE::computeOffsetsAndAbbrevs(const dwarf::FormParams &FormParams,
 //===----------------------------------------------------------------------===//
 // DIEUnit Implementation
 //===----------------------------------------------------------------------===//
-DIEUnit::DIEUnit(dwarf::Tag UnitTag)
-    : Die(UnitTag), Section(nullptr), Offset(0) {
+DIEUnit::DIEUnit(dwarf::Tag UnitTag) : Die(UnitTag) {
   Die.Owner = this;
   assert((UnitTag == dwarf::DW_TAG_compile_unit ||
           UnitTag == dwarf::DW_TAG_skeleton_unit ||
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index e36b7e2ae885..63343d2519f9 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -33,8 +33,7 @@
 #include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
-DwarfCFIExceptionBase::DwarfCFIExceptionBase(AsmPrinter *A)
-    : EHStreamer(A), shouldEmitCFI(false), hasEmittedCFISections(false) {}
+DwarfCFIExceptionBase::DwarfCFIExceptionBase(AsmPrinter *A) : EHStreamer(A) {}
 
 void DwarfCFIExceptionBase::markFunctionEnd() {
   endFragment();
@@ -52,8 +51,7 @@ void DwarfCFIExceptionBase::endFragment() {
 }
 
 DwarfCFIException::DwarfCFIException(AsmPrinter *A)
-    : DwarfCFIExceptionBase(A), shouldEmitPersonality(false),
-      forceEmitPersonality(false), shouldEmitLSDA(false) {}
+    : DwarfCFIExceptionBase(A) {}
 
 DwarfCFIException::~DwarfCFIException() {}
 
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 680b9586228f..609b568f28be 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -3367,8 +3367,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
   // Fast path if we're building some type units and one has already used the
   // address pool we know we're going to throw away all this work anyway, so
   // don't bother building dependent types.
-  if (!TypeUnitsUnderConstruction.empty() &&
-      (AddrPool.hasBeenUsed() || SeenLocalType))
+  if (!TypeUnitsUnderConstruction.empty() && AddrPool.hasBeenUsed())
     return;
 
   auto Ins = TypeSignatures.insert(std::make_pair(CTy, 0));
@@ -3379,7 +3378,6 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
 
   bool TopLevelType = TypeUnitsUnderConstruction.empty();
   AddrPool.resetUsedFlag();
-  SeenLocalType = false;
 
   auto OwnedUnit = std::make_unique<DwarfTypeUnit>(CU, Asm, this, &InfoHolder,
                                                     getDwoLineTable(CU));
@@ -3423,7 +3421,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
 
     // Types referencing entries in the address table cannot be placed in type
     // units.
-    if (AddrPool.hasBeenUsed() || SeenLocalType) {
+    if (AddrPool.hasBeenUsed()) {
 
       // Remove all the types built while building this type.
       // This is pessimistic as some of these types might not be dependent on
@@ -3451,18 +3449,14 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
 
 DwarfDebug::NonTypeUnitContext::NonTypeUnitContext(DwarfDebug *DD)
     : DD(DD),
-      TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)),
-      AddrPoolUsed(DD->AddrPool.hasBeenUsed()),
-      SeenLocalType(DD->SeenLocalType) {
+      TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)), AddrPoolUsed(DD->AddrPool.hasBeenUsed()) {
   DD->TypeUnitsUnderConstruction.clear();
   DD->AddrPool.resetUsedFlag();
-  DD->SeenLocalType = false;
 }
 
 DwarfDebug::NonTypeUnitContext::~NonTypeUnitContext() {
   DD->TypeUnitsUnderConstruction = std::move(TypeUnitsUnderConstruction);
   DD->AddrPool.resetUsedFlag(AddrPoolUsed);
-  DD->SeenLocalType = SeenLocalType;
 }
 
 DwarfDebug::NonTypeUnitContext DwarfDebug::enterNonTypeUnitContext() {
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 0043000652e8..4e1a1b1e068d 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -433,7 +433,6 @@ private:
   DenseMap<const DIStringType *, unsigned> StringTypeLocMap;
 
   AddressPool AddrPool;
-  bool SeenLocalType = false;
 
   /// Accelerator tables.
   AccelTable<DWARF5AccelTableData> AccelDebugNames;
@@ -672,7 +671,6 @@ public:
     DwarfDebug *DD;
     decltype(DwarfDebug::TypeUnitsUnderConstruction) TypeUnitsUnderConstruction;
     bool AddrPoolUsed;
-    bool SeenLocalType;
     friend class DwarfDebug;
     NonTypeUnitContext(DwarfDebug *DD);
   public:
@@ -681,7 +679,6 @@ public:
   };
 
   NonTypeUnitContext enterNonTypeUnitContext();
-  void seenLocalType() { SeenLocalType = true; }
 
   /// Add a label so that arange data can be generated for it.
   void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); }
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
index 4defa8a30855..e5cda4739fde 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -26,9 +26,9 @@ protected:
   DwarfCFIExceptionBase(AsmPrinter *A);
 
   /// Per-function flag to indicate if frame CFI info should be emitted.
-  bool shouldEmitCFI;
+  bool shouldEmitCFI = false;
   /// Per-module flag to indicate if .cfi_section has beeen emitted.
-  bool hasEmittedCFISections;
+  bool hasEmittedCFISections = false;
 
   void markFunctionEnd() override;
   void endFragment() override;
@@ -36,13 +36,13 @@ protected:
 
 class LLVM_LIBRARY_VISIBILITY DwarfCFIException : public DwarfCFIExceptionBase {
   /// Per-function flag to indicate if .cfi_personality should be emitted.
-  bool shouldEmitPersonality;
+  bool shouldEmitPersonality = false;
 
   /// Per-function flag to indicate if .cfi_personality must be emitted.
-  bool forceEmitPersonality;
+  bool forceEmitPersonality = false;
 
   /// Per-function flag to indicate if .cfi_lsda should be emitted.
-  bool shouldEmitLSDA;
+  bool shouldEmitLSDA = false;
 
 public:
   //===--------------------------------------------------------------------===//
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index ee932d105107..fe438102ee98 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -287,9 +287,17 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
   // expression representing a value, rather than a location.
   if ((!isParameterValue() && !isMemoryLocation() && !HasComplexExpression) ||
       isEntryValue()) {
+    auto FragmentInfo = ExprCursor.getFragmentInfo();
+    unsigned RegSize = 0;
     for (auto &Reg : DwarfRegs) {
+      RegSize += Reg.SubRegSize;
       if (Reg.DwarfRegNo >= 0)
         addReg(Reg.DwarfRegNo, Reg.Comment);
+      if (FragmentInfo)
+        if (RegSize > FragmentInfo->SizeInBits)
+          // If the register is larger than the current fragment stop
+          // once the fragment is covered.
+          break;
       addOpPiece(Reg.SubRegSize);
     }
 
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 15d90c54adfc..5a2bd479f277 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -89,8 +89,7 @@ bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
 
 DwarfUnit::DwarfUnit(dwarf::Tag UnitTag, const DICompileUnit *Node,
                      AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU)
-    : DIEUnit(UnitTag), CUNode(Node), Asm(A), DD(DW), DU(DWU),
-      IndexTyDie(nullptr) {}
+    : DIEUnit(UnitTag), CUNode(Node), Asm(A), DD(DW), DU(DWU) {}
 
 DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A,
                              DwarfDebug *DW, DwarfFile *DWU,
@@ -597,8 +596,10 @@ DIE *DwarfUnit::createTypeDIE(const DIScope *Context, DIE &ContextDIE,
       // Skip updating the accelerator tables since this is not the full type.
       if (MDString *TypeId = CTy->getRawIdentifier())
         DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy);
-      else
+      else {
+        auto X = DD->enterNonTypeUnitContext();
         finishNonUnitTypeDIE(TyDIE, CTy);
+      }
       return &TyDIE;
     }
     constructTypeDIE(TyDIE, CTy);
@@ -1852,23 +1853,5 @@ void DwarfTypeUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) {
     addString(D, dwarf::DW_AT_name, Name);
   if (Name.startswith("_STN") || !Name.contains('<'))
     addTemplateParams(D, CTy->getTemplateParams());
-  // If the type is in an anonymous namespace, we can't reference it from a TU
-  // (since the type would be CU local and the TU doesn't specify which TU has
-  // the appropriate type definition) - so flag this emission as such and skip
-  // the rest of the emission now since we're going to throw out all this work
-  // and put the outer/referencing type in the CU instead.
-  // FIXME: Probably good to generalize this to a DICompositeType flag populated
-  // by the frontend, then we could use that to have types that can have
-  // decl+def merged by LTO but where the definition still doesn't go in a type
-  // unit because the type has only one definition.
-  for (DIScope *S = CTy->getScope(); S; S = S->getScope()) {
-    if (auto *NS = dyn_cast<DINamespace>(S)) {
-      if (NS->getName().empty()) {
-        DD->seenLocalType();
-        break;
-      }
-    }
-  }
-  auto X = DD->enterNonTypeUnitContext();
   getCU().createTypeDIE(CTy);
 }
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 330f3bacca43..48d63d126701 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -51,7 +51,7 @@ protected:
   DwarfFile *DU;
 
   /// An anonymous type for index type.  Owned by DIEUnit.
-  DIE *IndexTyDie;
+  DIE *IndexTyDie = nullptr;
 
   /// Tracks the mapping of unit level debug information variables to debug
   /// information entries.
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 28f24e5ea908..c888adeafca5 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -3446,7 +3446,7 @@ private:
   bool AllAddrModesTrivial = true;
 
   /// Common Type for all different fields in addressing modes.
-  Type *CommonType;
+  Type *CommonType = nullptr;
 
   /// SimplifyQuery for simplifyInstruction utility.
   const SimplifyQuery &SQ;
@@ -3456,7 +3456,7 @@ private:
 
 public:
   AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue)
-      : CommonType(nullptr), SQ(_SQ), Original(OriginalValue) {}
+      : SQ(_SQ), Original(OriginalValue) {}
 
   /// Get the combined AddrMode
   const ExtAddrMode &getAddrMode() const {
diff --git a/llvm/lib/CodeGen/EarlyIfConversion.cpp b/llvm/lib/CodeGen/EarlyIfConversion.cpp
index 0b5469b02637..6a0da4dad3c1 100644
--- a/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -111,12 +111,11 @@ public:
   /// Information about each phi in the Tail block.
   struct PHIInfo {
     MachineInstr *PHI;
-    unsigned TReg, FReg;
+    unsigned TReg = 0, FReg = 0;
     // Latencies from Cond+Branch, TReg, and FReg to DstReg.
-    int CondCycles, TCycles, FCycles;
+    int CondCycles = 0, TCycles = 0, FCycles = 0;
 
-    PHIInfo(MachineInstr *phi)
-      : PHI(phi), TReg(0), FReg(0), CondCycles(0), TCycles(0), FCycles(0) {}
+    PHIInfo(MachineInstr *phi) : PHI(phi) {}
   };
 
   SmallVector<PHIInfo, 8> PHIs;
diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp
index d0c2b8c267ff..60ee1812ee2c 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -70,8 +70,8 @@ class MemCmpExpansion {
   CallInst *const CI;
   ResultBlock ResBlock;
   const uint64_t Size;
-  unsigned MaxLoadSize;
-  uint64_t NumLoadsNonOneByte;
+  unsigned MaxLoadSize = 0;
+  uint64_t NumLoadsNonOneByte = 0;
   const uint64_t NumLoadsPerBlockForZeroCmp;
   std::vector<BasicBlock *> LoadCmpBlocks;
   BasicBlock *EndBlock;
@@ -219,8 +219,7 @@ MemCmpExpansion::MemCmpExpansion(
     const TargetTransformInfo::MemCmpExpansionOptions &Options,
     const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,
     DomTreeUpdater *DTU)
-    : CI(CI), Size(Size), MaxLoadSize(0), NumLoadsNonOneByte(0),
-      NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock),
+    : CI(CI), Size(Size), NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock),
       IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), DTU(DTU),
       Builder(CI) {
   assert(Size > 0 && "zero blocks");
diff --git a/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp b/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp
index 727d33fe4a40..6271a4514c27 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegacyLegalizerInfo.cpp
@@ -64,7 +64,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, LegacyLegalizeAction Action) {
   return OS;
 }
 
-LegacyLegalizerInfo::LegacyLegalizerInfo() : TablesInitialized(false) {
+LegacyLegalizerInfo::LegacyLegalizerInfo() {
   // Set defaults.
   // FIXME: these two (G_ANYEXT and G_TRUNC?) can be legalized to the
   // fundamental load/store Jakob proposed. Once loads & stores are supported.
diff --git a/llvm/lib/CodeGen/IfConversion.cpp b/llvm/lib/CodeGen/IfConversion.cpp
index 681e2f3dc848..1b20d1da20ad 100644
--- a/llvm/lib/CodeGen/IfConversion.cpp
+++ b/llvm/lib/CodeGen/IfConversion.cpp
@@ -1211,11 +1211,11 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
 void IfConverter::AnalyzeBlock(
     MachineBasicBlock &MBB, std::vector<std::unique_ptr<IfcvtToken>> &Tokens) {
   struct BBState {
-    BBState(MachineBasicBlock &MBB) : MBB(&MBB), SuccsAnalyzed(false) {}
+    BBState(MachineBasicBlock &MBB) : MBB(&MBB) {}
     MachineBasicBlock *MBB;
 
     /// This flag is true if MBB's successors have been analyzed.
-    bool SuccsAnalyzed;
+    bool SuccsAnalyzed = false;
   };
 
   // Push MBB to the stack.
diff --git a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
index 2ee9379cb286..230c6846dde2 100644
--- a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
+++ b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -656,10 +656,10 @@ public:
   };
 
   /// Basic-block the load instructions are within
-  BasicBlock *BB;
+  BasicBlock *BB = nullptr;
 
   /// Pointer value of all participation load instructions
-  Value *PV;
+  Value *PV = nullptr;
 
   /// Participating load instructions
   std::set<LoadInst *> LIs;
@@ -668,7 +668,7 @@ public:
   std::set<Instruction *> Is;
 
   /// Final shuffle-vector instruction
-  ShuffleVectorInst *SVI;
+  ShuffleVectorInst *SVI = nullptr;
 
   /// Information of the offset for each vector element
   ElementInfo *EI;
@@ -676,8 +676,7 @@ public:
   /// Vector Type
   FixedVectorType *const VTy;
 
-  VectorInfo(FixedVectorType *VTy)
-      : BB(nullptr), PV(nullptr), SVI(nullptr), VTy(VTy) {
+  VectorInfo(FixedVectorType *VTy) : VTy(VTy) {
     EI = new ElementInfo[VTy->getNumElements()];
   }
 
diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
index 8a190e769941..0eb6100230bd 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
@@ -274,6 +274,13 @@ public:
 
     // Map of the preferred location for each value.
     DenseMap<ValueIDNum, LocIdx> ValueToLoc;
+
+    // Initialized the preferred-location map with illegal locations, to be
+    // filled in later.
+    for (auto &VLoc : VLocs)
+      if (VLoc.second.Kind == DbgValue::Def)
+        ValueToLoc.insert({VLoc.second.ID, LocIdx::MakeIllegalLoc()});
+
     ActiveMLocs.reserve(VLocs.size());
     ActiveVLocs.reserve(VLocs.size());
 
@@ -285,21 +292,20 @@ public:
       ValueIDNum &VNum = MLocs[Idx.asU64()];
       VarLocs.push_back(VNum);
 
-      // Short-circuit unnecessary preferred location update.
-      if (VLocs.empty())
+      // Is there a variable that wants a location for this value? If not, skip.
+      auto VIt = ValueToLoc.find(VNum);
+      if (VIt == ValueToLoc.end())
         continue;
 
-      auto it = ValueToLoc.find(VNum);
+      LocIdx CurLoc = VIt->second;
       // In order of preference, pick:
       //  * Callee saved registers,
       //  * Other registers,
       //  * Spill slots.
-      if (it == ValueToLoc.end() || MTracker->isSpill(it->second) ||
-          (!isCalleeSaved(it->second) && isCalleeSaved(Idx.asU64()))) {
+      if (CurLoc.isIllegal() || MTracker->isSpill(CurLoc) ||
+          (!isCalleeSaved(CurLoc) && isCalleeSaved(Idx.asU64()))) {
         // Insert, or overwrite if insertion failed.
-        auto PrefLocRes = ValueToLoc.insert(std::make_pair(VNum, Idx));
-        if (!PrefLocRes.second)
-          PrefLocRes.first->second = Idx;
+        VIt->second = Idx;
       }
     }
 
@@ -314,7 +320,7 @@ public:
       // If the value has no location, we can't make a variable location.
       const ValueIDNum &Num = Var.second.ID;
       auto ValuesPreferredLoc = ValueToLoc.find(Num);
-      if (ValuesPreferredLoc == ValueToLoc.end()) {
+      if (ValuesPreferredLoc->second.isIllegal()) {
         // If it's a def that occurs in this block, register it as a
         // use-before-def to be resolved as we step through the block.
         if (Num.getBlock() == (unsigned)MBB.getNumber() && !Num.isPHI())
@@ -1374,18 +1380,20 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
 
   // Look for any clobbers performed by a register mask. Only test locations
   // that are actually being tracked.
-  for (auto L : MTracker->locations()) {
-    // Stack locations can't be clobbered by regmasks.
-    if (MTracker->isSpill(L.Idx))
-      continue;
+  if (!RegMaskPtrs.empty()) {
+    for (auto L : MTracker->locations()) {
+      // Stack locations can't be clobbered by regmasks.
+      if (MTracker->isSpill(L.Idx))
+        continue;
 
-    Register Reg = MTracker->LocIdxToLocID[L.Idx];
-    if (IgnoreSPAlias(Reg))
-      continue;
+      Register Reg = MTracker->LocIdxToLocID[L.Idx];
+      if (IgnoreSPAlias(Reg))
+        continue;
 
-    for (auto *MO : RegMaskPtrs)
-      if (MO->clobbersPhysReg(Reg))
-        TTracker->clobberMloc(L.Idx, MI.getIterator(), false);
+      for (auto *MO : RegMaskPtrs)
+        if (MO->clobbersPhysReg(Reg))
+          TTracker->clobberMloc(L.Idx, MI.getIterator(), false);
+    }
   }
 
   // Tell TTracker about any folded stack store.
@@ -2212,40 +2220,6 @@ void InstrRefBasedLDV::buildMLocValueMap(
   // redundant PHIs.
 }
 
-// Boilerplate for feeding MachineBasicBlocks into IDF calculator. Provide
-// template specialisations for graph traits and a successor enumerator.
-namespace llvm {
-template <> struct GraphTraits<MachineBasicBlock> {
-  using NodeRef = MachineBasicBlock *;
-  using ChildIteratorType = MachineBasicBlock::succ_iterator;
-
-  static NodeRef getEntryNode(MachineBasicBlock *BB) { return BB; }
-  static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); }
-  static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
-};
-
-template <> struct GraphTraits<const MachineBasicBlock> {
-  using NodeRef = const MachineBasicBlock *;
-  using ChildIteratorType = MachineBasicBlock::const_succ_iterator;
-
-  static NodeRef getEntryNode(const MachineBasicBlock *BB) { return BB; }
-  static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); }
-  static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
-};
-
-using MachineDomTreeBase = DomTreeBase<MachineBasicBlock>::NodeType;
-using MachineDomTreeChildGetter =
-    typename IDFCalculatorDetail::ChildrenGetterTy<MachineDomTreeBase, false>;
-
-namespace IDFCalculatorDetail {
-template <>
-typename MachineDomTreeChildGetter::ChildrenTy
-MachineDomTreeChildGetter::get(const NodeRef &N) {
-  return {N->succ_begin(), N->succ_end()};
-}
-} // namespace IDFCalculatorDetail
-} // namespace llvm
-
 void InstrRefBasedLDV::BlockPHIPlacement(
     const SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks,
     const SmallPtrSetImpl<MachineBasicBlock *> &DefBlocks,
@@ -2253,8 +2227,7 @@ void InstrRefBasedLDV::BlockPHIPlacement(
   // Apply IDF calculator to the designated set of location defs, storing
   // required PHIs into PHIBlocks. Uses the dominator tree stored in the
   // InstrRefBasedLDV object.
-  IDFCalculatorDetail::ChildrenGetterTy<MachineDomTreeBase, false> foo;
-  IDFCalculatorBase<MachineDomTreeBase, false> IDF(DomTree->getBase(), foo);
+  IDFCalculatorBase<MachineBasicBlock, false> IDF(DomTree->getBase());
 
   IDF.setLiveInBlocks(AllBlocks);
   IDF.setDefiningBlocks(DefBlocks);
@@ -2465,8 +2438,71 @@ bool InstrRefBasedLDV::vlocJoin(
   }
 }
 
-void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc,
-    const SmallSet<DebugVariable, 4> &VarsWeCareAbout,
+void InstrRefBasedLDV::getBlocksForScope(
+    const DILocation *DILoc,
+    SmallPtrSetImpl<const MachineBasicBlock *> &BlocksToExplore,
+    const SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks) {
+  // Get the set of "normal" in-lexical-scope blocks.
+  LS.getMachineBasicBlocks(DILoc, BlocksToExplore);
+
+  // VarLoc LiveDebugValues tracks variable locations that are defined in
+  // blocks not in scope. This is something we could legitimately ignore, but
+  // lets allow it for now for the sake of coverage.
+  BlocksToExplore.insert(AssignBlocks.begin(), AssignBlocks.end());
+
+  // Storage for artificial blocks we intend to add to BlocksToExplore.
+  DenseSet<const MachineBasicBlock *> ToAdd;
+
+  // To avoid needlessly dropping large volumes of variable locations, propagate
+  // variables through aritifical blocks, i.e. those that don't have any
+  // instructions in scope at all. To accurately replicate VarLoc
+  // LiveDebugValues, this means exploring all artificial successors too.
+  // Perform a depth-first-search to enumerate those blocks.
+  for (auto *MBB : BlocksToExplore) {
+    // Depth-first-search state: each node is a block and which successor
+    // we're currently exploring.
+    SmallVector<std::pair<const MachineBasicBlock *,
+                          MachineBasicBlock::const_succ_iterator>,
+                8>
+        DFS;
+
+    // Find any artificial successors not already tracked.
+    for (auto *succ : MBB->successors()) {
+      if (BlocksToExplore.count(succ))
+        continue;
+      if (!ArtificialBlocks.count(succ))
+        continue;
+      ToAdd.insert(succ);
+      DFS.push_back({succ, succ->succ_begin()});
+    }
+
+    // Search all those blocks, depth first.
+    while (!DFS.empty()) {
+      const MachineBasicBlock *CurBB = DFS.back().first;
+      MachineBasicBlock::const_succ_iterator &CurSucc = DFS.back().second;
+      // Walk back if we've explored this blocks successors to the end.
+      if (CurSucc == CurBB->succ_end()) {
+        DFS.pop_back();
+        continue;
+      }
+
+      // If the current successor is artificial and unexplored, descend into
+      // it.
+      if (!ToAdd.count(*CurSucc) && ArtificialBlocks.count(*CurSucc)) {
+        ToAdd.insert(*CurSucc);
+        DFS.push_back({*CurSucc, (*CurSucc)->succ_begin()});
+        continue;
+      }
+
+      ++CurSucc;
+    }
+  };
+
+  BlocksToExplore.insert(ToAdd.begin(), ToAdd.end());
+}
+
+void InstrRefBasedLDV::buildVLocValueMap(
+    const DILocation *DILoc, const SmallSet<DebugVariable, 4> &VarsWeCareAbout,
     SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks, LiveInsT &Output,
     ValueIDNum **MOutLocs, ValueIDNum **MInLocs,
     SmallVectorImpl<VLocTracker> &AllTheVLocs) {
@@ -2490,74 +2526,7 @@ void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc,
     return BBToOrder[A] < BBToOrder[B];
   };
 
-  LS.getMachineBasicBlocks(DILoc, BlocksToExplore);
-
-  // A separate container to distinguish "blocks we're exploring" versus
-  // "blocks that are potentially in scope. See comment at start of vlocJoin.
-  SmallPtrSet<const MachineBasicBlock *, 8> InScopeBlocks = BlocksToExplore;
-
-  // VarLoc LiveDebugValues tracks variable locations that are defined in
-  // blocks not in scope. This is something we could legitimately ignore, but
-  // lets allow it for now for the sake of coverage.
-  BlocksToExplore.insert(AssignBlocks.begin(), AssignBlocks.end());
-
-  // We also need to propagate variable values through any artificial blocks
-  // that immediately follow blocks in scope.
-  DenseSet<const MachineBasicBlock *> ToAdd;
-
-  // Helper lambda: For a given block in scope, perform a depth first search
-  // of all the artificial successors, adding them to the ToAdd collection.
-  auto AccumulateArtificialBlocks =
-      [this, &ToAdd, &BlocksToExplore,
-       &InScopeBlocks](const MachineBasicBlock *MBB) {
-        // Depth-first-search state: each node is a block and which successor
-        // we're currently exploring.
-        SmallVector<std::pair<const MachineBasicBlock *,
-                              MachineBasicBlock::const_succ_iterator>,
-                    8>
-            DFS;
-
-        // Find any artificial successors not already tracked.
-        for (auto *succ : MBB->successors()) {
-          if (BlocksToExplore.count(succ) || InScopeBlocks.count(succ))
-            continue;
-          if (!ArtificialBlocks.count(succ))
-            continue;
-          ToAdd.insert(succ);
-          DFS.push_back(std::make_pair(succ, succ->succ_begin()));
-        }
-
-        // Search all those blocks, depth first.
-        while (!DFS.empty()) {
-          const MachineBasicBlock *CurBB = DFS.back().first;
-          MachineBasicBlock::const_succ_iterator &CurSucc = DFS.back().second;
-          // Walk back if we've explored this blocks successors to the end.
-          if (CurSucc == CurBB->succ_end()) {
-            DFS.pop_back();
-            continue;
-          }
-
-          // If the current successor is artificial and unexplored, descend into
-          // it.
-          if (!ToAdd.count(*CurSucc) && ArtificialBlocks.count(*CurSucc)) {
-            ToAdd.insert(*CurSucc);
-            DFS.push_back(std::make_pair(*CurSucc, (*CurSucc)->succ_begin()));
-            continue;
-          }
-
-          ++CurSucc;
-        }
-      };
-
-  // Search in-scope blocks and those containing a DBG_VALUE from this scope
-  // for artificial successors.
-  for (auto *MBB : BlocksToExplore)
-    AccumulateArtificialBlocks(MBB);
-  for (auto *MBB : InScopeBlocks)
-    AccumulateArtificialBlocks(MBB);
-
-  BlocksToExplore.insert(ToAdd.begin(), ToAdd.end());
-  InScopeBlocks.insert(ToAdd.begin(), ToAdd.end());
+  getBlocksForScope(DILoc, BlocksToExplore, AssignBlocks);
 
   // Single block scope: not interesting! No propagation at all. Note that
   // this could probably go above ArtificialBlocks without damage, but
@@ -2628,7 +2597,15 @@ void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc,
 
     SmallVector<MachineBasicBlock *, 32> PHIBlocks;
 
-    // Request the set of PHIs we should insert for this variable.
+    // Request the set of PHIs we should insert for this variable. If there's
+    // only one value definition, things are very simple.
+    if (DefBlocks.size() == 1) {
+      placePHIsForSingleVarDefinition(MutBlocksToExplore, *DefBlocks.begin(),
+                                      AllTheVLocs, Var, Output);
+      continue;
+    }
+
+    // Otherwise: we need to place PHIs through SSA and propagate values.
     BlockPHIPlacement(MutBlocksToExplore, DefBlocks, PHIBlocks);
 
     // Insert PHIs into the per-block live-in tables for this variable.
@@ -2769,6 +2746,39 @@ void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc,
   BlocksToExplore.clear();
 }
 
+void InstrRefBasedLDV::placePHIsForSingleVarDefinition(
+    const SmallPtrSetImpl<MachineBasicBlock *> &InScopeBlocks,
+    MachineBasicBlock *AssignMBB, SmallVectorImpl<VLocTracker> &AllTheVLocs,
+    const DebugVariable &Var, LiveInsT &Output) {
+  // If there is a single definition of the variable, then working out it's
+  // value everywhere is very simple: it's every block dominated by the
+  // definition. At the dominance frontier, the usual algorithm would:
+  //  * Place PHIs,
+  //  * Propagate values into them,
+  //  * Find there's no incoming variable value from the other incoming branches
+  //    of the dominance frontier,
+  //  * Specify there's no variable value in blocks past the frontier.
+  // This is a common case, hence it's worth special-casing it.
+
+  // Pick out the variables value from the block transfer function.
+  VLocTracker &VLocs = AllTheVLocs[AssignMBB->getNumber()];
+  auto ValueIt = VLocs.Vars.find(Var);
+  const DbgValue &Value = ValueIt->second;
+
+  // Assign the variable value to entry to each dominated block that's in scope.
+  // Skip the definition block -- it's assigned the variable value in the middle
+  // of the block somewhere.
+  for (auto *ScopeBlock : InScopeBlocks) {
+    if (!DomTree->properlyDominates(AssignMBB, ScopeBlock))
+      continue;
+
+    Output[ScopeBlock->getNumber()].push_back({Var, Value});
+  }
+
+  // All blocks that aren't dominated have no live-in value, thus no variable
+  // value will be given to them.
+}
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void InstrRefBasedLDV::dump_mloc_transfer(
     const MLocTransferMap &mloc_transfer) const {
@@ -2806,39 +2816,7 @@ void InstrRefBasedLDV::emitLocations(
     }
   }
 
-  // Go through all the transfers recorded in the TransferTracker -- this is
-  // both the live-ins to a block, and any movements of values that happen
-  // in the middle.
-  for (const auto &P : TTracker->Transfers) {
-    // We have to insert DBG_VALUEs in a consistent order, otherwise they
-    // appear in DWARF in different orders. Use the order that they appear
-    // when walking through each block / each instruction, stored in
-    // AllVarsNumbering.
-    SmallVector<std::pair<unsigned, MachineInstr *>> Insts;
-    for (MachineInstr *MI : P.Insts) {
-      DebugVariable Var(MI->getDebugVariable(), MI->getDebugExpression(),
-                        MI->getDebugLoc()->getInlinedAt());
-      Insts.emplace_back(AllVarsNumbering.find(Var)->second, MI);
-    }
-    llvm::sort(Insts,
-               [](const auto &A, const auto &B) { return A.first < B.first; });
-
-    // Insert either before or after the designated point...
-    if (P.MBB) {
-      MachineBasicBlock &MBB = *P.MBB;
-      for (const auto &Pair : Insts)
-        MBB.insert(P.Pos, Pair.second);
-    } else {
-      // Terminators, like tail calls, can clobber things. Don't try and place
-      // transfers after them.
-      if (P.Pos->isTerminator())
-        continue;
-
-      MachineBasicBlock &MBB = *P.Pos->getParent();
-      for (const auto &Pair : Insts)
-        MBB.insertAfterBundle(P.Pos, Pair.second);
-    }
-  }
+   emitTransfers(AllVarsNumbering);
 }
 
 void InstrRefBasedLDV::initialSetup(MachineFunction &MF) {
@@ -2883,6 +2861,45 @@ void InstrRefBasedLDV::initialSetup(MachineFunction &MF) {
 #endif
 }
 
+bool InstrRefBasedLDV::emitTransfers(
+        DenseMap<DebugVariable, unsigned> &AllVarsNumbering) {
+  // Go through all the transfers recorded in the TransferTracker -- this is
+  // both the live-ins to a block, and any movements of values that happen
+  // in the middle.
+  for (const auto &P : TTracker->Transfers) {
+    // We have to insert DBG_VALUEs in a consistent order, otherwise they
+    // appear in DWARF in different orders. Use the order that they appear
+    // when walking through each block / each instruction, stored in
+    // AllVarsNumbering.
+    SmallVector<std::pair<unsigned, MachineInstr *>> Insts;
+    for (MachineInstr *MI : P.Insts) {
+      DebugVariable Var(MI->getDebugVariable(), MI->getDebugExpression(),
+                        MI->getDebugLoc()->getInlinedAt());
+      Insts.emplace_back(AllVarsNumbering.find(Var)->second, MI);
+    }
+    llvm::sort(Insts,
+               [](const auto &A, const auto &B) { return A.first < B.first; });
+
+    // Insert either before or after the designated point...
+    if (P.MBB) {
+      MachineBasicBlock &MBB = *P.MBB;
+      for (const auto &Pair : Insts)
+        MBB.insert(P.Pos, Pair.second);
+    } else {
+      // Terminators, like tail calls, can clobber things. Don't try and place
+      // transfers after them.
+      if (P.Pos->isTerminator())
+        continue;
+
+      MachineBasicBlock &MBB = *P.Pos->getParent();
+      for (const auto &Pair : Insts)
+        MBB.insertAfterBundle(P.Pos, Pair.second);
+    }
+  }
+
+  return TTracker->Transfers.size() != 0;
+}
+
 /// Calculate the liveness information for the given machine function and
 /// extend ranges across basic blocks.
 bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
@@ -2989,14 +3006,14 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
   DenseMap<DebugVariable, unsigned> AllVarsNumbering;
 
   // Map from one LexicalScope to all the variables in that scope.
-  DenseMap<const LexicalScope *, SmallSet<DebugVariable, 4>> ScopeToVars;
+  ScopeToVarsT ScopeToVars;
 
-  // Map from One lexical scope to all blocks in that scope.
-  DenseMap<const LexicalScope *, SmallPtrSet<MachineBasicBlock *, 4>>
-      ScopeToBlocks;
+  // Map from One lexical scope to all blocks where assignments happen for
+  // that scope.
+  ScopeToAssignBlocksT ScopeToAssignBlocks;
 
-  // Store a DILocation that describes a scope.
-  DenseMap<const LexicalScope *, const DILocation *> ScopeToDILocation;
+  // Store map of DILocations that describes scopes.
+  ScopeToDILocT ScopeToDILocation;
 
   // To mirror old LiveDebugValues, enumerate variables in RPOT order. Otherwise
   // the order is unimportant, it just has to be stable.
@@ -3016,7 +3033,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
 
       AllVarsNumbering.insert(std::make_pair(Var, AllVarsNumbering.size()));
       ScopeToVars[Scope].insert(Var);
-      ScopeToBlocks[Scope].insert(VTracker->MBB);
+      ScopeToAssignBlocks[Scope].insert(VTracker->MBB);
       ScopeToDILocation[Scope] = ScopeLoc;
       ++VarAssignCount;
     }
@@ -3040,7 +3057,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
     // a map of variables to values in SavedLiveIns.
     for (auto &P : ScopeToVars) {
       buildVLocValueMap(ScopeToDILocation[P.first], P.second,
-                   ScopeToBlocks[P.first], SavedLiveIns, MOutLocs, MInLocs,
+                   ScopeToAssignBlocks[P.first], SavedLiveIns, MOutLocs, MInLocs,
                    vlocs);
     }
 
diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
index 9e9c0ce394fd..e7383209c027 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
+++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
@@ -779,6 +779,17 @@ public:
   /// Used as the result type for the variable value dataflow problem.
   using LiveInsT = SmallVector<SmallVector<VarAndLoc, 8>, 8>;
 
+  /// Mapping from lexical scopes to a DILocation in that scope.
+  using ScopeToDILocT = DenseMap<const LexicalScope *, const DILocation *>;
+
+  /// Mapping from lexical scopes to variables in that scope.
+  using ScopeToVarsT = DenseMap<const LexicalScope *, SmallSet<DebugVariable, 4>>;
+
+  /// Mapping from lexical scopes to blocks where variables in that scope are
+  /// assigned. Such blocks aren't necessarily "in" the lexical scope, it's
+  /// just a block where an assignment happens.
+  using ScopeToAssignBlocksT = DenseMap<const LexicalScope *, SmallPtrSet<MachineBasicBlock *, 4>>;
+
 private:
   MachineDominatorTree *DomTree;
   const TargetRegisterInfo *TRI;
@@ -816,7 +827,7 @@ private:
 
   /// Blocks which are artificial, i.e. blocks which exclusively contain
   /// instructions without DebugLocs, or with line 0 locations.
-  SmallPtrSet<const MachineBasicBlock *, 16> ArtificialBlocks;
+  SmallPtrSet<MachineBasicBlock *, 16> ArtificialBlocks;
 
   // Mapping of blocks to and from their RPOT order.
   DenseMap<unsigned int, MachineBasicBlock *> OrderToBB;
@@ -958,6 +969,15 @@ private:
                      ValueIDNum **MInLocs,
                      SmallVectorImpl<MLocTransferMap> &MLocTransfer);
 
+  /// Propagate variable values to blocks in the common case where there's
+  /// only one value assigned to the variable. This function has better
+  /// performance as it doesn't have to find the dominance frontier between
+  /// different assignments.
+  void placePHIsForSingleVarDefinition(
+          const SmallPtrSetImpl<MachineBasicBlock *> &InScopeBlocks,
+          MachineBasicBlock *MBB, SmallVectorImpl<VLocTracker> &AllTheVLocs,
+          const DebugVariable &Var, LiveInsT &Output);
+
   /// Calculate the iterated-dominance-frontier for a set of defs, using the
   /// existing LLVM facilities for this. Works for a single "value" or
   /// machine/variable location.
@@ -979,6 +999,19 @@ private:
                 SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
                 ValueIDNum **OutLocs, ValueIDNum *InLocs);
 
+  /// Produce a set of blocks that are in the current lexical scope. This means
+  /// those blocks that contain instructions "in" the scope, blocks where
+  /// assignments to variables in scope occur, and artificial blocks that are
+  /// successors to any of the earlier blocks. See https://llvm.org/PR48091 for
+  /// more commentry on what "in scope" means.
+  /// \p DILoc A location in the scope that we're fetching blocks for.
+  /// \p Output Set to put in-scope-blocks into.
+  /// \p AssignBlocks Blocks known to contain assignments of variables in scope.
+  void
+  getBlocksForScope(const DILocation *DILoc,
+                    SmallPtrSetImpl<const MachineBasicBlock *> &Output,
+                    const SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks);
+
   /// Solve the variable value dataflow problem, for a single lexical scope.
   /// Uses the algorithm from the file comment to resolve control flow joins
   /// using PHI placement and value propagation. Reads the locations of machine
@@ -1029,6 +1062,12 @@ private:
                      DenseMap<DebugVariable, unsigned> &AllVarsNumbering,
                      const TargetPassConfig &TPC);
 
+  /// Take collections of DBG_VALUE instructions stored in TTracker, and
+  /// install them into their output blocks. Preserves a stable order of
+  /// DBG_VALUEs produced (which would otherwise cause nondeterminism) through
+  /// the AllVarsNumbering order.
+  bool emitTransfers(DenseMap<DebugVariable, unsigned> &AllVarsNumbering);
+
   /// Boilerplate computation of some initial sets, artifical blocks and
   /// RPOT block ordering.
   void initialSetup(MachineFunction &MF);
diff --git a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
index b4dd41bbb810..42a0967bce3f 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
@@ -329,7 +329,7 @@ private:
       EntryValueKind,
       EntryValueBackupKind,
       EntryValueCopyBackupKind
-    } EVKind;
+    } EVKind = EntryValueLocKind::NonEntryValueKind;
 
     /// The value location. Stored separately to avoid repeatedly
     /// extracting it from MI.
@@ -397,8 +397,7 @@ private:
     VarLoc(const MachineInstr &MI, LexicalScopes &LS)
         : Var(MI.getDebugVariable(), MI.getDebugExpression(),
               MI.getDebugLoc()->getInlinedAt()),
-          Expr(MI.getDebugExpression()), MI(MI),
-          EVKind(EntryValueLocKind::NonEntryValueKind) {
+          Expr(MI.getDebugExpression()), MI(MI) {
       assert(MI.isDebugValue() && "not a DBG_VALUE");
       assert((MI.isDebugValueList() || MI.getNumOperands() == 4) &&
              "malformed DBG_VALUE");
diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
index a74c57690640..33782c755eb0 100644
--- a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
+++ b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
@@ -220,6 +220,19 @@ void resetInputs(MLModelRunner &Runner) {
 #undef _RESET
 }
 
+// Per-live interval components that get aggregated into the feature values that
+// will be passed to the evaluator.
+struct LIFeatureComponents {
+  double R = 0;
+  double W = 0;
+  double RW = 0;
+  double IndVarUpdates = 0;
+  double HintWeights = 0.0;
+  int64_t NrDefsAndUses = 0;
+  float HottestBlockFreq = 0.0;
+  bool IsRemat = false;
+};
+
 using CandidateRegList =
     std::array<std::pair<MCRegister, bool>, NumberOfInterferences>;
 using FeaturesListNormalizer = std::array<float, FeatureIDs::FeatureCount>;
@@ -227,8 +240,8 @@ using FeaturesListNormalizer = std::array<float, FeatureIDs::FeatureCount>;
 /// The ML evictor (commonalities between release and development mode)
 class MLEvictAdvisor : public RegAllocEvictionAdvisor {
 public:
-  MLEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
-                 MLModelRunner *Runner, const MachineBlockFrequencyInfo &MBFI,
+  MLEvictAdvisor(MachineFunction &MF, const RAGreedy &RA, MLModelRunner *Runner,
+                 const MachineBlockFrequencyInfo &MBFI,
                  const MachineLoopInfo &Loops);
 
 protected:
@@ -277,6 +290,9 @@ private:
                                                         FixedRegisters);
   }
 
+  const LIFeatureComponents
+  getLIFeatureComponents(const LiveInterval &LI) const;
+
   // Hold on to a default advisor for:
   // 1) the implementation of canEvictHintInterference, because we didn't learn
   // that nuance yet;
@@ -319,7 +335,7 @@ private:
   }
 
   std::unique_ptr<RegAllocEvictionAdvisor>
-  getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
+  getAdvisor(MachineFunction &MF, const RAGreedy &RA) override {
     if (!Runner)
       Runner = std::make_unique<ReleaseModeModelRunner<RegallocEvictModel>>(
           MF.getFunction().getContext(), FeatureNames, DecisionName);
@@ -364,7 +380,7 @@ static const std::vector<TensorSpec> TrainingInputFeatures{
 
 class DevelopmentModeEvictAdvisor : public MLEvictAdvisor {
 public:
-  DevelopmentModeEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
+  DevelopmentModeEvictAdvisor(MachineFunction &MF, const RAGreedy &RA,
                               MLModelRunner *Runner,
                               const MachineBlockFrequencyInfo &MBFI,
                               const MachineLoopInfo &Loops, Logger *Log)
@@ -420,7 +436,7 @@ private:
   }
 
   std::unique_ptr<RegAllocEvictionAdvisor>
-  getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
+  getAdvisor(MachineFunction &MF, const RAGreedy &RA) override {
     LLVMContext &Ctx = MF.getFunction().getContext();
     if (ModelUnderTraining.empty() && TrainingLog.empty()) {
       Ctx.emitError("Regalloc development mode should be requested with at "
@@ -480,7 +496,7 @@ float MLEvictAdvisor::getInitialQueueSize(const MachineFunction &MF) {
   return Ret;
 }
 
-MLEvictAdvisor::MLEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
+MLEvictAdvisor::MLEvictAdvisor(MachineFunction &MF, const RAGreedy &RA,
                                MLModelRunner *Runner,
                                const MachineBlockFrequencyInfo &MBFI,
                                const MachineLoopInfo &Loops)
@@ -615,16 +631,15 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate(
   for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E;
        ++I, ++Pos) {
     MCRegister PhysReg = *I;
-    Regs[Pos] = std::make_pair(PhysReg, true);
+    assert(!Regs[Pos].second);
     assert(PhysReg);
     if (!canAllocatePhysReg(CostPerUseLimit, PhysReg)) {
-      Regs[Pos].second = false;
       continue;
     }
     if (loadInterferenceFeatures(VirtReg, PhysReg, I.isHint(), FixedRegisters,
                                  Largest, Pos)) {
       ++Available;
-      Regs[Pos].second = true;
+      Regs[Pos] = std::make_pair(PhysReg, true);
     }
   }
   if (Available == 0) {
@@ -632,6 +647,7 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate(
     assert(!MustFindEviction);
     return MCRegister::NoRegister;
   }
+  const size_t ValidPosLimit = Pos;
   // If we must find eviction, the candidate should be masked out of the
   // decision making process.
   Regs[CandidateVirtRegPos].second = !MustFindEviction;
@@ -665,9 +681,55 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate(
     assert(!MustFindEviction);
     return MCRegister::NoRegister;
   }
+  assert(CandidatePos < ValidPosLimit);
+  (void)ValidPosLimit;
   return Regs[CandidatePos].first;
 }
 
+const LIFeatureComponents
+MLEvictAdvisor::getLIFeatureComponents(const LiveInterval &LI) const {
+  LIFeatureComponents Ret;
+  SmallPtrSet<MachineInstr *, 8> Visited;
+  const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+
+  for (MachineRegisterInfo::reg_instr_nodbg_iterator
+           I = MRI->reg_instr_nodbg_begin(LI.reg()),
+           E = MRI->reg_instr_nodbg_end();
+       I != E;) {
+    MachineInstr *MI = &*(I++);
+
+    ++Ret.NrDefsAndUses;
+    if (!Visited.insert(MI).second)
+      continue;
+
+    if (MI->isIdentityCopy() || MI->isImplicitDef())
+      continue;
+
+    bool Reads, Writes;
+    std::tie(Reads, Writes) = MI->readsWritesVirtualRegister(LI.reg());
+
+    float Freq = MBFI.getBlockFreqRelativeToEntryBlock(MI->getParent());
+    Ret.HottestBlockFreq = std::max(Freq, Ret.HottestBlockFreq);
+
+    Ret.R += (Reads && !Writes) * Freq;
+    Ret.W += (!Reads && Writes) * Freq;
+    Ret.RW += (Reads && Writes) * Freq;
+
+    auto *MBB = MI->getParent();
+    auto *Loop = Loops.getLoopFor(MBB);
+    bool IsExiting = Loop ? Loop->isLoopExiting(MBB) : false;
+
+    if (Writes && IsExiting && LIS->isLiveOutOfMBB(LI, MBB))
+      Ret.IndVarUpdates += Freq;
+
+    if (MI->isCopy() && VirtRegAuxInfo::copyHint(MI, LI.reg(), TRI, *MRI))
+      Ret.HintWeights += Freq;
+  }
+  Ret.IsRemat = VirtRegAuxInfo::isRematerializable(
+      LI, *LIS, *VRM, *MF.getSubtarget().getInstrInfo());
+  return Ret;
+}
+
 // Overall, this currently mimics what we do for weight calculation, but instead
 // of accummulating the various features, we keep them separate.
 void MLEvictAdvisor::extractFeatures(
@@ -676,11 +738,11 @@ void MLEvictAdvisor::extractFeatures(
     int64_t IsHint, int64_t LocalIntfsCount, float NrUrgent) const {
   int64_t NrDefsAndUses = 0;
   int64_t NrBrokenHints = 0;
-  float R = 0;
-  float W = 0;
-  float RW = 0;
-  float IndVarUpdates = 0;
-  float HintWeights = 0.0;
+  double R = 0.0;
+  double W = 0.0;
+  double RW = 0.0;
+  double IndVarUpdates = 0.0;
+  double HintWeights = 0.0;
   float StartBBFreq = 0.0;
   float EndBBFreq = 0.0;
   float HottestBlockFreq = 0.0;
@@ -707,46 +769,19 @@ void MLEvictAdvisor::extractFeatures(
 
     if (LI.endIndex() > EndSI)
       EndSI = LI.endIndex();
-
-    SmallPtrSet<MachineInstr *, 8> Visited;
-    const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+    const LIFeatureComponents LIFC = getLIFeatureComponents(LI);
     NrBrokenHints += VRM->hasPreferredPhys(LI.reg());
 
-    for (MachineRegisterInfo::reg_instr_nodbg_iterator
-             I = MRI->reg_instr_nodbg_begin(LI.reg()),
-             E = MRI->reg_instr_nodbg_end();
-         I != E;) {
-      MachineInstr *MI = &*(I++);
+    NrDefsAndUses += LIFC.NrDefsAndUses;
+    HottestBlockFreq = std::max(HottestBlockFreq, LIFC.HottestBlockFreq);
+    R += LIFC.R;
+    W += LIFC.W;
+    RW += LIFC.RW;
 
-      ++NrDefsAndUses;
-      if (!Visited.insert(MI).second)
-        continue;
+    IndVarUpdates += LIFC.IndVarUpdates;
 
-      if (MI->isIdentityCopy() || MI->isImplicitDef())
-        continue;
-
-      bool Reads, Writes;
-      std::tie(Reads, Writes) = MI->readsWritesVirtualRegister(LI.reg());
-
-      float Freq = MBFI.getBlockFreqRelativeToEntryBlock(MI->getParent());
-      if (Freq > HottestBlockFreq)
-        HottestBlockFreq = Freq;
-      R += (Reads && !Writes) * Freq;
-      W += (!Reads && Writes) * Freq;
-      RW += (Reads && Writes) * Freq;
-
-      auto *MBB = MI->getParent();
-      auto *Loop = Loops.getLoopFor(MBB);
-      bool IsExiting = Loop ? Loop->isLoopExiting(MBB) : false;
-
-      if (Writes && IsExiting && LIS->isLiveOutOfMBB(LI, MBB))
-        IndVarUpdates += Freq;
-
-      if (MI->isCopy() && VirtRegAuxInfo::copyHint(MI, LI.reg(), TRI, *MRI))
-        HintWeights += Freq;
-    }
-    NrRematerializable += VirtRegAuxInfo::isRematerializable(
-        LI, *LIS, *VRM, *MF.getSubtarget().getInstrInfo());
+    HintWeights += LIFC.HintWeights;
+    NrRematerializable += LIFC.IsRemat;
   }
   size_t Size = 0;
   if (!Intervals.empty()) {
diff --git a/llvm/lib/CodeGen/MachineModuleInfo.cpp b/llvm/lib/CodeGen/MachineModuleInfo.cpp
index 50cbb14e926e..31d4fc7d02bf 100644
--- a/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -400,12 +400,14 @@ bool MachineModuleInfoWrapperPass::doInitialization(Module &M) {
   // FIXME: Do this for new pass manager.
   LLVMContext &Ctx = M.getContext();
   MMI.getContext().setDiagnosticHandler(
-      [&Ctx](const SMDiagnostic &SMD, bool IsInlineAsm, const SourceMgr &SrcMgr,
-             std::vector<const MDNode *> &LocInfos) {
+      [&Ctx, &M](const SMDiagnostic &SMD, bool IsInlineAsm,
+                 const SourceMgr &SrcMgr,
+                 std::vector<const MDNode *> &LocInfos) {
         unsigned LocCookie = 0;
         if (IsInlineAsm)
           LocCookie = getLocCookie(SMD, SrcMgr, LocInfos);
-        Ctx.diagnose(DiagnosticInfoSrcMgr(SMD, IsInlineAsm, LocCookie));
+        Ctx.diagnose(
+            DiagnosticInfoSrcMgr(SMD, M.getName(), IsInlineAsm, LocCookie));
       });
   MMI.DbgInfoAvailable = !M.debug_compile_units().empty();
   return false;
diff --git a/llvm/lib/CodeGen/MachineModuleSlotTracker.cpp b/llvm/lib/CodeGen/MachineModuleSlotTracker.cpp
index e4da179efcc4..aa63411df965 100644
--- a/llvm/lib/CodeGen/MachineModuleSlotTracker.cpp
+++ b/llvm/lib/CodeGen/MachineModuleSlotTracker.cpp
@@ -66,8 +66,7 @@ MachineModuleSlotTracker::MachineModuleSlotTracker(
     const MachineFunction *MF, bool ShouldInitializeAllMetadata)
     : ModuleSlotTracker(MF->getFunction().getParent(),
                         ShouldInitializeAllMetadata),
-      TheFunction(MF->getFunction()), TheMMI(MF->getMMI()), MDNStartSlot(0),
-      MDNEndSlot(0) {
+      TheFunction(MF->getFunction()), TheMMI(MF->getMMI()) {
   setProcessHook([this](AbstractSlotTrackerStorage *AST, const Module *M,
                         bool ShouldInitializeAllMetadata) {
     this->processMachineModule(AST, M, ShouldInitializeAllMetadata);
diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 19bf87d3e290..1a4ad53ddf81 100644
--- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -43,8 +43,7 @@ void MachineRegisterInfo::Delegate::anchor() {}
 
 MachineRegisterInfo::MachineRegisterInfo(MachineFunction *MF)
     : MF(MF), TracksSubRegLiveness(MF->getSubtarget().enableSubRegLiveness() &&
-                                   EnableSubRegLiveness),
-      IsUpdatedCSRsInitialized(false) {
+                                   EnableSubRegLiveness) {
   unsigned NumRegs = getTargetRegisterInfo()->getNumRegs();
   VRegInfo.reserve(256);
   RegAllocHints.reserve(256);
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index 005d4ad1a328..c9d3e473062b 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -1909,7 +1909,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
     const Register Reg = MO->getReg();
     if (!Reg)
       return;
-    if (MRI->tracksLiveness() && !MI->isDebugValue())
+    if (MRI->tracksLiveness() && !MI->isDebugInstr())
       checkLiveness(MO, MONum);
 
     // Verify the consistency of tied operands.
diff --git a/llvm/lib/CodeGen/PostRASchedulerList.cpp b/llvm/lib/CodeGen/PostRASchedulerList.cpp
index d7cd0a583cee..aac46cb22084 100644
--- a/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -139,7 +139,7 @@ namespace {
     ///
     /// This is the instruction number from the top of the current block, not
     /// the SlotIndex. It is only used by the AntiDepBreaker.
-    unsigned EndIndex;
+    unsigned EndIndex = 0;
 
   public:
     SchedulePostRATDList(
@@ -206,7 +206,7 @@ SchedulePostRATDList::SchedulePostRATDList(
     const RegisterClassInfo &RCI,
     TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
     SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs)
-    : ScheduleDAGInstrs(MF, &MLI), AA(AA), EndIndex(0) {
+    : ScheduleDAGInstrs(MF, &MLI), AA(AA) {
 
   const InstrItineraryData *InstrItins =
       MF.getSubtarget().getInstrItineraryData();
diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
index 87df7bb4a689..fc5d1104a999 100644
--- a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
+++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
@@ -25,7 +25,7 @@
 using namespace llvm;
 
 static cl::opt<RegAllocEvictionAdvisorAnalysis::AdvisorMode> Mode(
-    "regalloc-enable-advisor", cl::Hidden,
+    "regalloc-enable-advisor", cl::Hidden, cl::ZeroOrMore,
     cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default),
     cl::desc("Enable regalloc advisor mode"),
     cl::values(
@@ -66,7 +66,7 @@ public:
 
 private:
   std::unique_ptr<RegAllocEvictionAdvisor>
-  getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
+  getAdvisor(MachineFunction &MF, const RAGreedy &RA) override {
     return std::make_unique<DefaultEvictionAdvisor>(MF, RA);
   }
   bool doInitialization(Module &M) override {
@@ -113,7 +113,7 @@ StringRef RegAllocEvictionAdvisorAnalysis::getPassName() const {
   llvm_unreachable("Unknown advisor kind");
 }
 
-RegAllocEvictionAdvisor::RegAllocEvictionAdvisor(const MachineFunction &MF,
+RegAllocEvictionAdvisor::RegAllocEvictionAdvisor(MachineFunction &MF,
                                                  const RAGreedy &RA)
     : MF(MF), RA(RA), Matrix(RA.getInterferenceMatrix()),
       LIS(RA.getLiveIntervals()), VRM(RA.getVirtRegMap()),
@@ -122,3 +122,178 @@ RegAllocEvictionAdvisor::RegAllocEvictionAdvisor(const MachineFunction &MF,
       EnableLocalReassign(EnableLocalReassignment ||
                           MF.getSubtarget().enableRALocalReassignment(
                               MF.getTarget().getOptLevel())) {}
+
+/// shouldEvict - determine if A should evict the assigned live range B. The
+/// eviction policy defined by this function together with the allocation order
+/// defined by enqueue() decides which registers ultimately end up being split
+/// and spilled.
+///
+/// Cascade numbers are used to prevent infinite loops if this function is a
+/// cyclic relation.
+///
+/// @param A          The live range to be assigned.
+/// @param IsHint     True when A is about to be assigned to its preferred
+///                   register.
+/// @param B          The live range to be evicted.
+/// @param BreaksHint True when B is already assigned to its preferred register.
+bool DefaultEvictionAdvisor::shouldEvict(LiveInterval &A, bool IsHint,
+                                         LiveInterval &B,
+                                         bool BreaksHint) const {
+  bool CanSplit = RA.getExtraInfo().getStage(B) < RS_Spill;
+
+  // Be fairly aggressive about following hints as long as the evictee can be
+  // split.
+  if (CanSplit && IsHint && !BreaksHint)
+    return true;
+
+  if (A.weight() > B.weight()) {
+    LLVM_DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight() << '\n');
+    return true;
+  }
+  return false;
+}
+
+/// canEvictHintInterference - return true if the interference for VirtReg
+/// on the PhysReg, which is VirtReg's hint, can be evicted in favor of VirtReg.
+bool DefaultEvictionAdvisor::canEvictHintInterference(
+    LiveInterval &VirtReg, MCRegister PhysReg,
+    const SmallVirtRegSet &FixedRegisters) const {
+  EvictionCost MaxCost;
+  MaxCost.setBrokenHints(1);
+  return canEvictInterferenceBasedOnCost(VirtReg, PhysReg, true, MaxCost,
+                                         FixedRegisters);
+}
+
+/// canEvictInterferenceBasedOnCost - Return true if all interferences between
+/// VirtReg and PhysReg can be evicted.
+///
+/// @param VirtReg Live range that is about to be assigned.
+/// @param PhysReg Desired register for assignment.
+/// @param IsHint  True when PhysReg is VirtReg's preferred register.
+/// @param MaxCost Only look for cheaper candidates and update with new cost
+///                when returning true.
+/// @returns True when interference can be evicted cheaper than MaxCost.
+bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost(
+    LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint,
+    EvictionCost &MaxCost, const SmallVirtRegSet &FixedRegisters) const {
+  // It is only possible to evict virtual register interference.
+  if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg)
+    return false;
+
+  bool IsLocal = VirtReg.empty() || LIS->intervalIsInOneMBB(VirtReg);
+
+  // Find VirtReg's cascade number. This will be unassigned if VirtReg was never
+  // involved in an eviction before. If a cascade number was assigned, deny
+  // evicting anything with the same or a newer cascade number. This prevents
+  // infinite eviction loops.
+  //
+  // This works out so a register without a cascade number is allowed to evict
+  // anything, and it can be evicted by anything.
+  unsigned Cascade = RA.getExtraInfo().getCascadeOrCurrentNext(VirtReg.reg());
+
+  EvictionCost Cost;
+  for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+    LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
+    // If there is 10 or more interferences, chances are one is heavier.
+    const auto &Interferences = Q.interferingVRegs(10);
+    if (Interferences.size() >= 10)
+      return false;
+
+    // Check if any interfering live range is heavier than MaxWeight.
+    for (LiveInterval *Intf : reverse(Interferences)) {
+      assert(Register::isVirtualRegister(Intf->reg()) &&
+             "Only expecting virtual register interference from query");
+
+      // Do not allow eviction of a virtual register if we are in the middle
+      // of last-chance recoloring and this virtual register is one that we
+      // have scavenged a physical register for.
+      if (FixedRegisters.count(Intf->reg()))
+        return false;
+
+      // Never evict spill products. They cannot split or spill.
+      if (RA.getExtraInfo().getStage(*Intf) == RS_Done)
+        return false;
+      // Once a live range becomes small enough, it is urgent that we find a
+      // register for it. This is indicated by an infinite spill weight. These
+      // urgent live ranges get to evict almost anything.
+      //
+      // Also allow urgent evictions of unspillable ranges from a strictly
+      // larger allocation order.
+      bool Urgent =
+          !VirtReg.isSpillable() &&
+          (Intf->isSpillable() ||
+           RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg())) <
+               RegClassInfo.getNumAllocatableRegs(
+                   MRI->getRegClass(Intf->reg())));
+      // Only evict older cascades or live ranges without a cascade.
+      unsigned IntfCascade = RA.getExtraInfo().getCascade(Intf->reg());
+      if (Cascade <= IntfCascade) {
+        if (!Urgent)
+          return false;
+        // We permit breaking cascades for urgent evictions. It should be the
+        // last resort, though, so make it really expensive.
+        Cost.BrokenHints += 10;
+      }
+      // Would this break a satisfied hint?
+      bool BreaksHint = VRM->hasPreferredPhys(Intf->reg());
+      // Update eviction cost.
+      Cost.BrokenHints += BreaksHint;
+      Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight());
+      // Abort if this would be too expensive.
+      if (!(Cost < MaxCost))
+        return false;
+      if (Urgent)
+        continue;
+      // Apply the eviction policy for non-urgent evictions.
+      if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint))
+        return false;
+      // If !MaxCost.isMax(), then we're just looking for a cheap register.
+      // Evicting another local live range in this case could lead to suboptimal
+      // coloring.
+      if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) &&
+          (!EnableLocalReassign || !canReassign(*Intf, PhysReg))) {
+        return false;
+      }
+    }
+  }
+  MaxCost = Cost;
+  return true;
+}
+
+MCRegister DefaultEvictionAdvisor::tryFindEvictionCandidate(
+    LiveInterval &VirtReg, const AllocationOrder &Order,
+    uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const {
+  // Keep track of the cheapest interference seen so far.
+  EvictionCost BestCost;
+  BestCost.setMax();
+  MCRegister BestPhys;
+  auto MaybeOrderLimit = getOrderLimit(VirtReg, Order, CostPerUseLimit);
+  if (!MaybeOrderLimit)
+    return MCRegister::NoRegister;
+  unsigned OrderLimit = *MaybeOrderLimit;
+
+  // When we are just looking for a reduced cost per use, don't break any
+  // hints, and only evict smaller spill weights.
+  if (CostPerUseLimit < uint8_t(~0u)) {
+    BestCost.BrokenHints = 0;
+    BestCost.MaxWeight = VirtReg.weight();
+  }
+
+  for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E;
+       ++I) {
+    MCRegister PhysReg = *I;
+    assert(PhysReg);
+    if (!canAllocatePhysReg(CostPerUseLimit, PhysReg) ||
+        !canEvictInterferenceBasedOnCost(VirtReg, PhysReg, false, BestCost,
+                                         FixedRegisters))
+      continue;
+
+    // Best so far.
+    BestPhys = PhysReg;
+
+    // Stop if the hint can be used.
+    if (I.isHint())
+      break;
+  }
+  return BestPhys;
+}
diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
index 33e03aed81a7..1f40386db8da 100644
--- a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
+++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
@@ -115,7 +115,7 @@ public:
   bool isUnusedCalleeSavedReg(MCRegister PhysReg) const;
 
 protected:
-  RegAllocEvictionAdvisor(const MachineFunction &MF, const RAGreedy &RA);
+  RegAllocEvictionAdvisor(MachineFunction &MF, const RAGreedy &RA);
 
   Register canReassign(LiveInterval &VirtReg, Register PrevReg) const;
 
@@ -173,7 +173,7 @@ public:
 
   /// Get an advisor for the given context (i.e. machine function, etc)
   virtual std::unique_ptr<RegAllocEvictionAdvisor>
-  getAdvisor(const MachineFunction &MF, const RAGreedy &RA) = 0;
+  getAdvisor(MachineFunction &MF, const RAGreedy &RA) = 0;
   AdvisorMode getAdvisorMode() const { return Mode; }
 
 protected:
@@ -200,7 +200,7 @@ RegAllocEvictionAdvisorAnalysis *createDevelopmentModeAdvisor();
 // out of RegAllocGreedy.cpp
 class DefaultEvictionAdvisor : public RegAllocEvictionAdvisor {
 public:
-  DefaultEvictionAdvisor(const MachineFunction &MF, const RAGreedy &RA)
+  DefaultEvictionAdvisor(MachineFunction &MF, const RAGreedy &RA)
       : RegAllocEvictionAdvisor(MF, RA) {}
 
 private:
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 6ea6dbcbbb74..7870574df5b2 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -440,143 +440,6 @@ Register RegAllocEvictionAdvisor::canReassign(LiveInterval &VirtReg,
   return PhysReg;
 }
 
-/// shouldEvict - determine if A should evict the assigned live range B. The
-/// eviction policy defined by this function together with the allocation order
-/// defined by enqueue() decides which registers ultimately end up being split
-/// and spilled.
-///
-/// Cascade numbers are used to prevent infinite loops if this function is a
-/// cyclic relation.
-///
-/// @param A          The live range to be assigned.
-/// @param IsHint     True when A is about to be assigned to its preferred
-///                   register.
-/// @param B          The live range to be evicted.
-/// @param BreaksHint True when B is already assigned to its preferred register.
-bool DefaultEvictionAdvisor::shouldEvict(LiveInterval &A, bool IsHint,
-                                         LiveInterval &B,
-                                         bool BreaksHint) const {
-  bool CanSplit = RA.getExtraInfo().getStage(B) < RS_Spill;
-
-  // Be fairly aggressive about following hints as long as the evictee can be
-  // split.
-  if (CanSplit && IsHint && !BreaksHint)
-    return true;
-
-  if (A.weight() > B.weight()) {
-    LLVM_DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight() << '\n');
-    return true;
-  }
-  return false;
-}
-
-/// canEvictHintInterference - return true if the interference for VirtReg
-/// on the PhysReg, which is VirtReg's hint, can be evicted in favor of VirtReg.
-bool DefaultEvictionAdvisor::canEvictHintInterference(
-    LiveInterval &VirtReg, MCRegister PhysReg,
-    const SmallVirtRegSet &FixedRegisters) const {
-  EvictionCost MaxCost;
-  MaxCost.setBrokenHints(1);
-  return canEvictInterferenceBasedOnCost(VirtReg, PhysReg, true, MaxCost,
-                                         FixedRegisters);
-}
-
-/// canEvictInterferenceBasedOnCost - Return true if all interferences between
-/// VirtReg and PhysReg can be evicted.
-///
-/// @param VirtReg Live range that is about to be assigned.
-/// @param PhysReg Desired register for assignment.
-/// @param IsHint  True when PhysReg is VirtReg's preferred register.
-/// @param MaxCost Only look for cheaper candidates and update with new cost
-///                when returning true.
-/// @returns True when interference can be evicted cheaper than MaxCost.
-bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost(
-    LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint,
-    EvictionCost &MaxCost, const SmallVirtRegSet &FixedRegisters) const {
-  // It is only possible to evict virtual register interference.
-  if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg)
-    return false;
-
-  bool IsLocal = VirtReg.empty() || LIS->intervalIsInOneMBB(VirtReg);
-
-  // Find VirtReg's cascade number. This will be unassigned if VirtReg was never
-  // involved in an eviction before. If a cascade number was assigned, deny
-  // evicting anything with the same or a newer cascade number. This prevents
-  // infinite eviction loops.
-  //
-  // This works out so a register without a cascade number is allowed to evict
-  // anything, and it can be evicted by anything.
-  unsigned Cascade = RA.getExtraInfo().getCascadeOrCurrentNext(VirtReg.reg());
-
-  EvictionCost Cost;
-  for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
-    LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
-    // If there is 10 or more interferences, chances are one is heavier.
-    const auto &Interferences = Q.interferingVRegs(10);
-    if (Interferences.size() >= 10)
-      return false;
-
-    // Check if any interfering live range is heavier than MaxWeight.
-    for (LiveInterval *Intf : reverse(Interferences)) {
-      assert(Register::isVirtualRegister(Intf->reg()) &&
-             "Only expecting virtual register interference from query");
-
-      // Do not allow eviction of a virtual register if we are in the middle
-      // of last-chance recoloring and this virtual register is one that we
-      // have scavenged a physical register for.
-      if (FixedRegisters.count(Intf->reg()))
-        return false;
-
-      // Never evict spill products. They cannot split or spill.
-      if (RA.getExtraInfo().getStage(*Intf) == RS_Done)
-        return false;
-      // Once a live range becomes small enough, it is urgent that we find a
-      // register for it. This is indicated by an infinite spill weight. These
-      // urgent live ranges get to evict almost anything.
-      //
-      // Also allow urgent evictions of unspillable ranges from a strictly
-      // larger allocation order.
-      bool Urgent =
-          !VirtReg.isSpillable() &&
-          (Intf->isSpillable() ||
-           RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg())) <
-               RegClassInfo.getNumAllocatableRegs(
-                   MRI->getRegClass(Intf->reg())));
-      // Only evict older cascades or live ranges without a cascade.
-      unsigned IntfCascade = RA.getExtraInfo().getCascade(Intf->reg());
-      if (Cascade <= IntfCascade) {
-        if (!Urgent)
-          return false;
-        // We permit breaking cascades for urgent evictions. It should be the
-        // last resort, though, so make it really expensive.
-        Cost.BrokenHints += 10;
-      }
-      // Would this break a satisfied hint?
-      bool BreaksHint = VRM->hasPreferredPhys(Intf->reg());
-      // Update eviction cost.
-      Cost.BrokenHints += BreaksHint;
-      Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight());
-      // Abort if this would be too expensive.
-      if (!(Cost < MaxCost))
-        return false;
-      if (Urgent)
-        continue;
-      // Apply the eviction policy for non-urgent evictions.
-      if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint))
-        return false;
-      // If !MaxCost.isMax(), then we're just looking for a cheap register.
-      // Evicting another local live range in this case could lead to suboptimal
-      // coloring.
-      if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) &&
-          (!EnableLocalReassign || !canReassign(*Intf, PhysReg))) {
-        return false;
-      }
-    }
-  }
-  MaxCost = Cost;
-  return true;
-}
-
 /// Return true if all interferences between VirtReg and PhysReg between
 /// Start and End can be evicted.
 ///
@@ -757,44 +620,6 @@ bool RegAllocEvictionAdvisor::canAllocatePhysReg(unsigned CostPerUseLimit,
   return true;
 }
 
-MCRegister DefaultEvictionAdvisor::tryFindEvictionCandidate(
-    LiveInterval &VirtReg, const AllocationOrder &Order,
-    uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const {
-  // Keep track of the cheapest interference seen so far.
-  EvictionCost BestCost;
-  BestCost.setMax();
-  MCRegister BestPhys;
-  auto MaybeOrderLimit = getOrderLimit(VirtReg, Order, CostPerUseLimit);
-  if (!MaybeOrderLimit)
-    return MCRegister::NoRegister;
-  unsigned OrderLimit = *MaybeOrderLimit;
-
-  // When we are just looking for a reduced cost per use, don't break any
-  // hints, and only evict smaller spill weights.
-  if (CostPerUseLimit < uint8_t(~0u)) {
-    BestCost.BrokenHints = 0;
-    BestCost.MaxWeight = VirtReg.weight();
-  }
-
-  for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E;
-       ++I) {
-    MCRegister PhysReg = *I;
-    assert(PhysReg);
-    if (!canAllocatePhysReg(CostPerUseLimit, PhysReg) ||
-        !canEvictInterferenceBasedOnCost(VirtReg, PhysReg, false, BestCost,
-                                         FixedRegisters))
-      continue;
-
-    // Best so far.
-    BestPhys = PhysReg;
-
-    // Stop if the hint can be used.
-    if (I.isHint())
-      break;
-  }
-  return BestPhys;
-}
-
 /// tryEvict - Try to evict all interferences for a physreg.
 /// @param  VirtReg Currently unassigned virtual register.
 /// @param  Order   Physregs to try.
@@ -2922,6 +2747,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
 
   RegCosts = TRI->getRegisterCosts(*MF);
 
+  ExtraInfo.emplace();
+  EvictAdvisor =
+      getAnalysis<RegAllocEvictionAdvisorAnalysis>().getAdvisor(*MF, *this);
+
   VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI);
   SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, *VRAI));
 
@@ -2931,9 +2760,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
 
   SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
   SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI, *VRAI));
-  ExtraInfo.emplace();
-  EvictAdvisor =
-      getAnalysis<RegAllocEvictionAdvisorAnalysis>().getAdvisor(*MF, *this);
+
   IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
   GlobalCand.resize(32);  // This will grow as needed.
   SetOfBrokenHints.clear();
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 932f263d2558..041d7e5b4a4a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -143,7 +143,7 @@ namespace {
     SelectionDAG &DAG;
     const TargetLowering &TLI;
     const SelectionDAGTargetInfo *STI;
-    CombineLevel Level;
+    CombineLevel Level = BeforeLegalizeTypes;
     CodeGenOpt::Level OptLevel;
     bool LegalDAG = false;
     bool LegalOperations = false;
@@ -238,8 +238,7 @@ namespace {
   public:
     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
         : DAG(D), TLI(D.getTargetLoweringInfo()),
-          STI(D.getSubtarget().getSelectionDAGInfo()),
-          Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) {
+          STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
       ForCodeSize = DAG.shouldOptForSize();
       DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
 
@@ -441,6 +440,7 @@ namespace {
     SDValue visitSRA(SDNode *N);
     SDValue visitSRL(SDNode *N);
     SDValue visitFunnelShift(SDNode *N);
+    SDValue visitSHLSAT(SDNode *N);
     SDValue visitRotate(SDNode *N);
     SDValue visitABS(SDNode *N);
     SDValue visitBSWAP(SDNode *N);
@@ -907,9 +907,8 @@ bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
     return true;
   }
 
-  if (N.getOpcode() != ISD::SELECT_CC ||
-      !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
-      !TLI.isConstFalseVal(N.getOperand(3).getNode()))
+  if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
+      !TLI.isConstFalseVal(N.getOperand(3)))
     return false;
 
   if (TLI.getBooleanContents(N.getValueType()) ==
@@ -1654,6 +1653,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::ROTL:               return visitRotate(N);
   case ISD::FSHL:
   case ISD::FSHR:               return visitFunnelShift(N);
+  case ISD::SSHLSAT:
+  case ISD::USHLSAT:            return visitSHLSAT(N);
   case ISD::ABS:                return visitABS(N);
   case ISD::BSWAP:              return visitBSWAP(N);
   case ISD::BITREVERSE:         return visitBITREVERSE(N);
@@ -5530,8 +5531,6 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
 
     // Some constants may need fixing up later if they are too large.
     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
-      if (Mask->getValueType(0) != C->getValueType(0))
-        return false;
       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
         NodesWithConsts.insert(N);
@@ -5565,9 +5564,9 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
     case ISD::AssertZext: {
       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
-      EVT VT = Op.getOpcode() == ISD::AssertZext
-                   ? cast<VTSDNode>(Op.getOperand(1))->getVT()
-                   : Op.getOperand(0).getValueType();
+      EVT VT = Op.getOpcode() == ISD::AssertZext ?
+        cast<VTSDNode>(Op.getOperand(1))->getVT() :
+        Op.getOperand(0).getValueType();
 
       // We can accept extending nodes if the mask is wider or an equal
       // width to the original type.
@@ -5575,15 +5574,6 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
         continue;
       break;
     }
-    case ISD::ANY_EXTEND: {
-      unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
-      EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
-      EVT VT = Op.getOperand(0).getValueType();
-      if (ExtVT.bitsGE(VT))
-        break;
-      // Fallthrough to searching for nodes from the operands of the extend.
-      LLVM_FALLTHROUGH;
-    }
     case ISD::OR:
     case ISD::XOR:
     case ISD::AND:
@@ -5643,14 +5633,12 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
     // masking.
     if (FixupNode) {
       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
-      SDValue MaskOpT = DAG.getZExtOrTrunc(MaskOp, SDLoc(FixupNode),
-                                           FixupNode->getValueType(0));
-      SDValue And =
-          DAG.getNode(ISD::AND, SDLoc(FixupNode), FixupNode->getValueType(0),
-                      SDValue(FixupNode, 0), MaskOpT);
+      SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
+                                FixupNode->getValueType(0),
+                                SDValue(FixupNode, 0), MaskOp);
       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
       if (And.getOpcode() == ISD ::AND)
-        DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOpT);
+        DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
     }
 
     // Narrow any constants that need it.
@@ -5659,12 +5647,10 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
       SDValue Op1 = LogicN->getOperand(1);
 
       if (isa<ConstantSDNode>(Op0))
-        std::swap(Op0, Op1);
+          std::swap(Op0, Op1);
 
-      SDValue MaskOpT =
-          DAG.getZExtOrTrunc(MaskOp, SDLoc(Op1), Op1.getValueType());
-      SDValue And =
-          DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOpT);
+      SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
+                                Op1, MaskOp);
 
       DAG.UpdateNodeOperands(LogicN, Op0, And);
     }
@@ -5672,14 +5658,12 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
     // Create narrow loads.
     for (auto *Load : Loads) {
       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
-      SDValue MaskOpT =
-          DAG.getZExtOrTrunc(MaskOp, SDLoc(Load), Load->getValueType(0));
       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
-                                SDValue(Load, 0), MaskOpT);
+                                SDValue(Load, 0), MaskOp);
       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
       if (And.getOpcode() == ISD ::AND)
         And = SDValue(
-            DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOpT), 0);
+            DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
       SDValue NewLoad = reduceLoadWidth(And.getNode());
       assert(NewLoad &&
              "Shouldn't be masking the load if it can't be narrowed");
@@ -8036,8 +8020,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
   // fold !(x cc y) -> (x !cc y)
   unsigned N0Opcode = N0.getOpcode();
   SDValue LHS, RHS, CC;
-  if (TLI.isConstTrueVal(N1.getNode()) &&
-      isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/true)) {
+  if (TLI.isConstTrueVal(N1) &&
+      isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
                                                LHS.getValueType());
     if (!LegalOperations ||
@@ -9348,6 +9332,22 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  if (SDValue V = DAG.simplifyShift(N0, N1))
+    return V;
+
+  EVT VT = N0.getValueType();
+
+  // fold (*shlsat c1, c2) -> c1<<c2
+  if (SDValue C =
+          DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0, N1}))
+    return C;
+
+  return SDValue();
+}
+
 // Given a ABS node, detect the following pattern:
 // (ABS (SUB (EXTEND a), (EXTEND b))).
 // Generates UABD/SABD instruction.
@@ -14580,7 +14580,7 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
   unsigned NumElts = 1;
   EVT VT = N->getValueType(0);
   if (VT.isVector() && DAG.isSplatValue(N1))
-    NumElts = VT.getVectorNumElements();
+    NumElts = VT.getVectorMinNumElements();
 
   if (!MinUses || (N1->use_size() * NumElts) < MinUses)
     return SDValue();
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index bfde35935c7b..d8ef79fe9a7b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1838,8 +1838,7 @@ FastISel::FastISel(FunctionLoweringInfo &FuncInfo,
       TII(*MF->getSubtarget().getInstrInfo()),
       TLI(*MF->getSubtarget().getTargetLowering()),
       TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo),
-      SkipTargetIndependentISel(SkipTargetIndependentISel),
-      LastLocalValue(nullptr), EmitStartPt(nullptr) {}
+      SkipTargetIndependentISel(SkipTargetIndependentISel) {}
 
 FastISel::~FastISel() = default;
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 403f34573899..55f6f288f3e3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -47,8 +47,7 @@ static cl::opt<int> HighLatencyCycles(
            "instructions take for targets with no itinerary"));
 
 ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
-    : ScheduleDAG(mf), BB(nullptr), DAG(nullptr),
-      InstrItins(mf.getSubtarget().getInstrItineraryData()) {}
+    : ScheduleDAG(mf), InstrItins(mf.getSubtarget().getInstrItineraryData()) {}
 
 /// Run - perform scheduling.
 ///
@@ -577,7 +576,7 @@ void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
 // Construct a RegDefIter for this SUnit and find the first valid value.
 ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU,
                                            const ScheduleDAGSDNodes *SD)
-  : SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) {
+    : SchedDAG(SD), Node(SU->getNode()) {
   InitNodeNumDefs();
   Advance();
 }
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 8c28ce403c9b..99bbaeb19182 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -45,8 +45,8 @@ class InstrItineraryData;
   ///
   class ScheduleDAGSDNodes : public ScheduleDAG {
   public:
-    MachineBasicBlock *BB;
-    SelectionDAG *DAG;                    // DAG of the current basic block
+    MachineBasicBlock *BB = nullptr;
+    SelectionDAG *DAG = nullptr; // DAG of the current basic block
     const InstrItineraryData *InstrItins;
 
     /// The schedule. Null SUnit*'s represent noop instructions.
@@ -138,8 +138,8 @@ class InstrItineraryData;
     class RegDefIter {
       const ScheduleDAGSDNodes *SchedDAG;
       const SDNode *Node;
-      unsigned DefIdx;
-      unsigned NodeNumDefs;
+      unsigned DefIdx = 0;
+      unsigned NodeNumDefs = 0;
       MVT ValueType;
 
     public:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 45f3005e8f57..d5998d166d25 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2449,7 +2449,7 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits,
   switch (V.getOpcode()) {
   default:
     return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts,
-                                                *this, 0);
+                                                *this);
   case ISD::Constant: {
     const APInt &CVal = cast<ConstantSDNode>(V)->getAPIntValue();
     APInt NewVal = CVal & DemandedBits;
@@ -3082,6 +3082,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
     Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
     Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
     bool SelfMultiply = Op.getOperand(0) == Op.getOperand(1);
+    // TODO: SelfMultiply can be poison, but not undef.
+    SelfMultiply &= isGuaranteedNotToBeUndefOrPoison(
+        Op.getOperand(0), DemandedElts, false, Depth + 1);
     Known = KnownBits::mul(Known, Known2, SelfMultiply);
     break;
   }
@@ -5240,6 +5243,8 @@ static llvm::Optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
   case ISD::UADDSAT: return C1.uadd_sat(C2);
   case ISD::SSUBSAT: return C1.ssub_sat(C2);
   case ISD::USUBSAT: return C1.usub_sat(C2);
+  case ISD::SSHLSAT: return C1.sshl_sat(C2);
+  case ISD::USHLSAT: return C1.ushl_sat(C2);
   case ISD::UDIV:
     if (!C2.getBoolValue())
       break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 41460f78e1c2..01230a36e744 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4014,7 +4014,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
   Type *Ty = I.getAllocatedType();
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   auto &DL = DAG.getDataLayout();
-  uint64_t TySize = DL.getTypeAllocSize(Ty);
+  TypeSize TySize = DL.getTypeAllocSize(Ty);
   MaybeAlign Alignment = std::max(DL.getPrefTypeAlign(Ty), I.getAlign());
 
   SDValue AllocSize = getValue(I.getArraySize());
@@ -4023,9 +4023,15 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
   if (AllocSize.getValueType() != IntPtr)
     AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);
 
-  AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr,
-                          AllocSize,
-                          DAG.getConstant(TySize, dl, IntPtr));
+  if (TySize.isScalable())
+    AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr, AllocSize,
+                            DAG.getVScale(dl, IntPtr,
+                                          APInt(IntPtr.getScalarSizeInBits(),
+                                                TySize.getKnownMinValue())));
+  else
+    AllocSize =
+        DAG.getNode(ISD::MUL, dl, IntPtr, AllocSize,
+                    DAG.getConstant(TySize.getFixedValue(), dl, IntPtr));
 
   // Handle alignment.  If the requested alignment is less than or equal to
   // the stack alignment, ignore it.  If the size is greater than or equal to
@@ -6870,6 +6876,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
   case Intrinsic::experimental_gc_relocate:
     visitGCRelocate(cast<GCRelocateInst>(I));
     return;
+  case Intrinsic::instrprof_cover:
+    llvm_unreachable("instrprof failed to lower a cover");
   case Intrinsic::instrprof_increment:
     llvm_unreachable("instrprof failed to lower an increment");
   case Intrinsic::instrprof_value_profile:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 77e11b364588..3c786904620a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -319,7 +319,7 @@ SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL)
       CurDAG(new SelectionDAG(tm, OL)),
       SDB(std::make_unique<SelectionDAGBuilder>(*CurDAG, *FuncInfo, *SwiftError,
                                                 OL)),
-      AA(), GFI(), OptLevel(OL), DAGSize(0) {
+      OptLevel(OL) {
   initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
   initializeBranchProbabilityInfoWrapperPassPass(
       *PassRegistry::getPassRegistry());
diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index e2db9633bfb9..dfda7d8b9f81 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -990,6 +990,24 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
   return ReturnVal;
 }
 
+/// Return two gc.results if present.  First result is a block local
+/// gc.result, second result is a non-block local gc.result.  Corresponding
+/// entry will be nullptr if not present.
+static std::pair<const GCResultInst*, const GCResultInst*>
+getGCResultLocality(const GCStatepointInst &S) {
+  std::pair<const GCResultInst *, const GCResultInst*> Res(nullptr, nullptr);
+  for (auto *U : S.users()) {
+    auto *GRI = dyn_cast<GCResultInst>(U);
+    if (!GRI)
+      continue;
+    if (GRI->getParent() == S.getParent())
+      Res.first = GRI;
+    else
+      Res.second = GRI;
+  }
+  return Res;
+}
+
 void
 SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I,
                                      const BasicBlock *EHPadBB /*= nullptr*/) {
@@ -1075,12 +1093,11 @@ SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I,
   SDValue ReturnValue = LowerAsSTATEPOINT(SI);
 
   // Export the result value if needed
-  const std::pair<bool, bool> GCResultLocality = I.getGCResultLocality();
-  Type *RetTy = I.getActualReturnType();
+  const auto GCResultLocality = getGCResultLocality(I);
 
-  if (RetTy->isVoidTy() ||
-      (!GCResultLocality.first && !GCResultLocality.second)) {
-    // The return value is not needed, just generate a poison value. 
+  if (!GCResultLocality.first && !GCResultLocality.second) {
+    // The return value is not needed, just generate a poison value.
+    // Note: This covers the void return case.
     setValue(&I, DAG.getIntPtrConstant(-1, getCurSDLoc()));
     return;
   }
@@ -1102,6 +1119,7 @@ SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I,
   // manually.
   // TODO: To eliminate this problem we can remove gc.result intrinsics
   //       completely and make statepoint call to return a tuple.
+  Type *RetTy = GCResultLocality.second->getType();
   unsigned Reg = FuncInfo.CreateRegs(RetTy);
   RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
                    DAG.getDataLayout(), Reg, RetTy,
@@ -1168,7 +1186,7 @@ void SelectionDAGBuilder::visitGCResult(const GCResultInst &CI) {
   // register because statepoint and actual call return types can be
   // different, and getValue() will use CopyFromReg of the wrong type,
   // which is always i32 in our case.
-  Type *RetTy = SI->getActualReturnType();
+  Type *RetTy = CI.getType();
   SDValue CopyFromReg = getCopyFromRegs(SI, RetTy);
   
   assert(CopyFromReg.getNode());
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index a98c21f16c71..f6d1fa87676f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -63,7 +63,7 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
   AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
   for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
                            Attribute::DereferenceableOrNull, Attribute::NoAlias,
-                           Attribute::NonNull})
+                           Attribute::NonNull, Attribute::NoUndef})
     CallerAttrs.removeAttribute(Attr);
 
   if (CallerAttrs.hasAttributes())
@@ -606,6 +606,23 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
 }
 
 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
+                                          const APInt &DemandedElts,
+                                          DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+                        !DCI.isBeforeLegalizeOps());
+  KnownBits Known;
+
+  bool Simplified =
+      SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
+  if (Simplified) {
+    DCI.AddToWorklist(Op.getNode());
+    DCI.CommitTargetLoweringOpt(TLO);
+  }
+  return Simplified;
+}
+
+bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
                                           KnownBits &Known,
                                           TargetLoweringOpt &TLO,
                                           unsigned Depth,
@@ -2247,8 +2264,12 @@ bool TargetLowering::SimplifyDemandedBits(
     }
     break;
   }
-  case ISD::ADD:
   case ISD::MUL:
+    // 'Quadratic Reciprocity': mul(x,x) -> 0 if we're only demanding bit[1]
+    if (DemandedBits == 2 && Op.getOperand(0) == Op.getOperand(1))
+      return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
+    LLVM_FALLTHROUGH;
+  case ISD::ADD:
   case ISD::SUB: {
     // Add, Sub, and Mul don't demand any bits in positions beyond that
     // of the highest bit demanded of them.
@@ -3173,29 +3194,25 @@ bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
 // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
 // work with truncating build vectors and vectors with elements of less than
 // 8 bits.
-bool TargetLowering::isConstTrueVal(const SDNode *N) const {
+bool TargetLowering::isConstTrueVal(SDValue N) const {
   if (!N)
     return false;
 
+  unsigned EltWidth;
   APInt CVal;
-  if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
+  if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
+                                               /*AllowTruncation=*/true)) {
     CVal = CN->getAPIntValue();
-  } else if (auto *BV = dyn_cast<BuildVectorSDNode>(N)) {
-    auto *CN = BV->getConstantSplatNode();
-    if (!CN)
-      return false;
-
-    // If this is a truncating build vector, truncate the splat value.
-    // Otherwise, we may fail to match the expected values below.
-    unsigned BVEltWidth = BV->getValueType(0).getScalarSizeInBits();
-    CVal = CN->getAPIntValue();
-    if (BVEltWidth < CVal.getBitWidth())
-      CVal = CVal.trunc(BVEltWidth);
-  } else {
+    EltWidth = N.getValueType().getScalarSizeInBits();
+  } else
     return false;
-  }
 
-  switch (getBooleanContents(N->getValueType(0))) {
+  // If this is a truncating splat, truncate the splat value.
+  // Otherwise, we may fail to match the expected values below.
+  if (EltWidth < CVal.getBitWidth())
+    CVal = CVal.trunc(EltWidth);
+
+  switch (getBooleanContents(N.getValueType())) {
   case UndefinedBooleanContent:
     return CVal[0];
   case ZeroOrOneBooleanContent:
@@ -3207,7 +3224,7 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const {
   llvm_unreachable("Invalid boolean contents");
 }
 
-bool TargetLowering::isConstFalseVal(const SDNode *N) const {
+bool TargetLowering::isConstFalseVal(SDValue N) const {
   if (!N)
     return false;
 
@@ -3742,7 +3759,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
             TopSetCC.getOpcode() == ISD::SETCC &&
             (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
-            (isConstFalseVal(N1C) ||
+            (isConstFalseVal(N1) ||
              isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
 
           bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
diff --git a/llvm/lib/CodeGen/SlotIndexes.cpp b/llvm/lib/CodeGen/SlotIndexes.cpp
index c933031ef37d..ffac68a223bf 100644
--- a/llvm/lib/CodeGen/SlotIndexes.cpp
+++ b/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -20,7 +20,7 @@ using namespace llvm;
 
 char SlotIndexes::ID = 0;
 
-SlotIndexes::SlotIndexes() : MachineFunctionPass(ID), mf(nullptr) {
+SlotIndexes::SlotIndexes() : MachineFunctionPass(ID) {
   initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
 }
 
diff --git a/llvm/lib/DWARFLinker/DWARFStreamer.cpp b/llvm/lib/DWARFLinker/DWARFStreamer.cpp
index 1ab6ead3b5f6..99e12fce6513 100644
--- a/llvm/lib/DWARFLinker/DWARFStreamer.cpp
+++ b/llvm/lib/DWARFLinker/DWARFStreamer.cpp
@@ -27,7 +27,8 @@
 
 namespace llvm {
 
-bool DwarfStreamer::init(Triple TheTriple) {
+bool DwarfStreamer::init(Triple TheTriple,
+                         StringRef Swift5ReflectionSegmentName) {
   std::string ErrorStr;
   std::string TripleName;
   StringRef Context = "dwarf streamer init";
@@ -54,8 +55,9 @@ bool DwarfStreamer::init(Triple TheTriple) {
   if (!MSTI)
     return error("no subtarget info for target " + TripleName, Context), false;
 
-  MC.reset(new MCContext(TheTriple, MAI.get(), MRI.get(), MSTI.get()));
-  MOFI.reset(TheTarget->createMCObjectFileInfo(*MC, /*PIC=*/false));
+  MC.reset(new MCContext(TheTriple, MAI.get(), MRI.get(), MSTI.get(), nullptr,
+                         nullptr, true, Swift5ReflectionSegmentName));
+  MOFI.reset(TheTarget->createMCObjectFileInfo(*MC, /*PIC=*/false, false));
   MC->setObjectFileInfo(MOFI.get());
 
   MAB = TheTarget->createMCAsmBackend(*MSTI, *MRI, MCOptions);
@@ -302,6 +304,18 @@ void DwarfStreamer::emitSwiftAST(StringRef Buffer) {
   MS->emitBytes(Buffer);
 }
 
+void DwarfStreamer::emitSwiftReflectionSection(
+    llvm::binaryformat::Swift5ReflectionSectionKind ReflSectionKind,
+    StringRef Buffer, uint32_t Alignment, uint32_t Size) {
+  MCSection *ReflectionSection =
+      MOFI->getSwift5ReflectionSection(ReflSectionKind);
+  if (ReflectionSection == nullptr)
+    return;
+  ReflectionSection->setAlignment(Align(Alignment));
+  MS->SwitchSection(ReflectionSection);
+  MS->emitBytes(Buffer);
+}
+
 /// Emit the debug_range section contents for \p FuncRange by
 /// translating the original \p Entries. The debug_range section
 /// format is totally trivial, consisting just of pairs of address
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 3b8d80c4eeec..99001269e1f8 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -2866,6 +2866,90 @@ CallInst *OpenMPIRBuilder::createOMPFree(const LocationDescription &Loc,
   return Builder.CreateCall(Fn, Args, Name);
 }
 
+CallInst *OpenMPIRBuilder::createOMPInteropInit(
+    const LocationDescription &Loc, Value *InteropVar,
+    omp::OMPInteropType InteropType, Value *Device, Value *NumDependences,
+    Value *DependenceAddress, bool HaveNowaitClause) {
+  IRBuilder<>::InsertPointGuard IPG(Builder);
+  Builder.restoreIP(Loc.IP);
+
+  uint32_t SrcLocStrSize;
+  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
+  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+  Value *ThreadId = getOrCreateThreadID(Ident);
+  if (Device == nullptr)
+    Device = ConstantInt::get(Int32, -1);
+  Constant *InteropTypeVal = ConstantInt::get(Int64, (int)InteropType);
+  if (NumDependences == nullptr) {
+    NumDependences = ConstantInt::get(Int32, 0);
+    PointerType *PointerTypeVar = Type::getInt8PtrTy(M.getContext());
+    DependenceAddress = ConstantPointerNull::get(PointerTypeVar);
+  }
+  Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
+  Value *Args[] = {
+      Ident,  ThreadId,       InteropVar,        InteropTypeVal,
+      Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
+
+  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_init);
+
+  return Builder.CreateCall(Fn, Args);
+}
+
+CallInst *OpenMPIRBuilder::createOMPInteropDestroy(
+    const LocationDescription &Loc, Value *InteropVar, Value *Device,
+    Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause) {
+  IRBuilder<>::InsertPointGuard IPG(Builder);
+  Builder.restoreIP(Loc.IP);
+
+  uint32_t SrcLocStrSize;
+  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
+  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+  Value *ThreadId = getOrCreateThreadID(Ident);
+  if (Device == nullptr)
+    Device = ConstantInt::get(Int32, -1);
+  if (NumDependences == nullptr) {
+    NumDependences = ConstantInt::get(Int32, 0);
+    PointerType *PointerTypeVar = Type::getInt8PtrTy(M.getContext());
+    DependenceAddress = ConstantPointerNull::get(PointerTypeVar);
+  }
+  Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
+  Value *Args[] = {
+      Ident,          ThreadId,          InteropVar,         Device,
+      NumDependences, DependenceAddress, HaveNowaitClauseVal};
+
+  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_destroy);
+
+  return Builder.CreateCall(Fn, Args);
+}
+
+CallInst *OpenMPIRBuilder::createOMPInteropUse(const LocationDescription &Loc,
+                                               Value *InteropVar, Value *Device,
+                                               Value *NumDependences,
+                                               Value *DependenceAddress,
+                                               bool HaveNowaitClause) {
+  IRBuilder<>::InsertPointGuard IPG(Builder);
+  Builder.restoreIP(Loc.IP);
+  uint32_t SrcLocStrSize;
+  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
+  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+  Value *ThreadId = getOrCreateThreadID(Ident);
+  if (Device == nullptr)
+    Device = ConstantInt::get(Int32, -1);
+  if (NumDependences == nullptr) {
+    NumDependences = ConstantInt::get(Int32, 0);
+    PointerType *PointerTypeVar = Type::getInt8PtrTy(M.getContext());
+    DependenceAddress = ConstantPointerNull::get(PointerTypeVar);
+  }
+  Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
+  Value *Args[] = {
+      Ident,          ThreadId,          InteropVar,         Device,
+      NumDependences, DependenceAddress, HaveNowaitClauseVal};
+
+  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_use);
+
+  return Builder.CreateCall(Fn, Args);
+}
+
 CallInst *OpenMPIRBuilder::createCachedThreadPrivate(
     const LocationDescription &Loc, llvm::Value *Pointer,
     llvm::ConstantInt *Size, const llvm::Twine &Name) {
@@ -3138,7 +3222,7 @@ OpenMPIRBuilder::createAtomicRead(const LocationDescription &Loc,
 
   Type *XTy = X.Var->getType();
   assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory");
-  Type *XElemTy = XTy->getPointerElementType();
+  Type *XElemTy = X.ElemTy;
   assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
           XElemTy->isPointerTy()) &&
          "OMP atomic read expected a scalar type");
@@ -3180,7 +3264,7 @@ OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc,
 
   Type *XTy = X.Var->getType();
   assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory");
-  Type *XElemTy = XTy->getPointerElementType();
+  Type *XElemTy = X.ElemTy;
   assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
           XElemTy->isPointerTy()) &&
          "OMP atomic write expected a scalar type");
@@ -3216,7 +3300,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicUpdate(
     Type *XTy = X.Var->getType();
     assert(XTy->isPointerTy() &&
            "OMP Atomic expects a pointer to target memory");
-    Type *XElemTy = XTy->getPointerElementType();
+    Type *XElemTy = X.ElemTy;
     assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
             XElemTy->isPointerTy()) &&
            "OMP atomic update expected a scalar type");
@@ -3225,8 +3309,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicUpdate(
            "OpenMP atomic does not support LT or GT operations");
   });
 
-  emitAtomicUpdate(AllocIP, X.Var, Expr, AO, RMWOp, UpdateOp, X.IsVolatile,
-                   IsXBinopExpr);
+  emitAtomicUpdate(AllocIP, X.Var, X.ElemTy, Expr, AO, RMWOp, UpdateOp,
+                   X.IsVolatile, IsXBinopExpr);
   checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update);
   return Builder.saveIP();
 }
@@ -3259,13 +3343,10 @@ Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2,
   llvm_unreachable("Unsupported atomic update operation");
 }
 
-std::pair<Value *, Value *>
-OpenMPIRBuilder::emitAtomicUpdate(Instruction *AllocIP, Value *X, Value *Expr,
-                                  AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
-                                  AtomicUpdateCallbackTy &UpdateOp,
-                                  bool VolatileX, bool IsXBinopExpr) {
-  Type *XElemTy = X->getType()->getPointerElementType();
-
+std::pair<Value *, Value *> OpenMPIRBuilder::emitAtomicUpdate(
+    Instruction *AllocIP, Value *X, Type *XElemTy, Value *Expr,
+    AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
+    AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, bool IsXBinopExpr) {
   bool DoCmpExch =
       ((RMWOp == AtomicRMWInst::BAD_BINOP) || (RMWOp == AtomicRMWInst::FAdd)) ||
       (RMWOp == AtomicRMWInst::FSub) ||
@@ -3380,8 +3461,9 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCapture(
   // If UpdateExpr is 'x' updated with some `expr` not based on 'x',
   // 'x' is simply atomically rewritten with 'expr'.
   AtomicRMWInst::BinOp AtomicOp = (UpdateExpr ? RMWOp : AtomicRMWInst::Xchg);
-  std::pair<Value *, Value *> Result = emitAtomicUpdate(
-      AllocIP, X.Var, Expr, AO, AtomicOp, UpdateOp, X.IsVolatile, IsXBinopExpr);
+  std::pair<Value *, Value *> Result =
+      emitAtomicUpdate(AllocIP, X.Var, X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
+                       X.IsVolatile, IsXBinopExpr);
 
   Value *CapturedVal = (IsPostfixUpdate ? Result.first : Result.second);
   Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp
index c92bacaee36d..43fde64c3734 100644
--- a/llvm/lib/IR/Attributes.cpp
+++ b/llvm/lib/IR/Attributes.cpp
@@ -23,7 +23,6 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Twine.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/LLVMContext.h"
@@ -31,11 +30,9 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
-#include <climits>
 #include <cstddef>
 #include <cstdint>
 #include <limits>
@@ -390,26 +387,15 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
   //   align=4
   //   alignstack=8
   //
-  if (hasAttribute(Attribute::Alignment)) {
-    std::string Result;
-    Result += "align";
-    Result += (InAttrGrp) ? "=" : " ";
-    Result += utostr(getValueAsInt());
-    return Result;
-  }
+  if (hasAttribute(Attribute::Alignment))
+    return (InAttrGrp ? "align=" + Twine(getValueAsInt())
+                      : "align " + Twine(getValueAsInt()))
+        .str();
 
   auto AttrWithBytesToString = [&](const char *Name) {
-    std::string Result;
-    Result += Name;
-    if (InAttrGrp) {
-      Result += "=";
-      Result += utostr(getValueAsInt());
-    } else {
-      Result += "(";
-      Result += utostr(getValueAsInt());
-      Result += ")";
-    }
-    return Result;
+    return (InAttrGrp ? Name + ("=" + Twine(getValueAsInt()))
+                      : Name + ("(" + Twine(getValueAsInt())) + ")")
+        .str();
   };
 
   if (hasAttribute(Attribute::StackAlignment))
@@ -426,26 +412,18 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
     Optional<unsigned> NumElems;
     std::tie(ElemSize, NumElems) = getAllocSizeArgs();
 
-    std::string Result = "allocsize(";
-    Result += utostr(ElemSize);
-    if (NumElems.hasValue()) {
-      Result += ',';
-      Result += utostr(*NumElems);
-    }
-    Result += ')';
-    return Result;
+    return (NumElems
+                ? "allocsize(" + Twine(ElemSize) + "," + Twine(*NumElems) + ")"
+                : "allocsize(" + Twine(ElemSize) + ")")
+        .str();
   }
 
   if (hasAttribute(Attribute::VScaleRange)) {
     unsigned MinValue = getVScaleRangeMin();
     Optional<unsigned> MaxValue = getVScaleRangeMax();
-
-    std::string Result = "vscale_range(";
-    Result += utostr(MinValue);
-    Result += ',';
-    Result += utostr(MaxValue.getValueOr(0));
-    Result += ')';
-    return Result;
+    return ("vscale_range(" + Twine(MinValue) + "," +
+            Twine(MaxValue.getValueOr(0)) + ")")
+        .str();
   }
 
   // Convert target-dependent attributes to strings of the form:
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 45459e200b3d..11839c7572e3 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -15,7 +15,6 @@
 #include "llvm/IR/AutoUpgrade.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/IR/Constants.h"
-#include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Function.h"
diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index 7beafc485d09..99e3afaa8ba8 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -12,15 +12,14 @@
 
 #include "llvm/IR/BasicBlock.h"
 #include "SymbolTableListTraitsImpl.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Type.h"
-#include <algorithm>
 
 using namespace llvm;
 
diff --git a/llvm/lib/IR/Comdat.cpp b/llvm/lib/IR/Comdat.cpp
index 90d5c6e82e5c..2cd6db913621 100644
--- a/llvm/lib/IR/Comdat.cpp
+++ b/llvm/lib/IR/Comdat.cpp
@@ -11,11 +11,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm-c/Comdat.h"
-#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringMapEntry.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/Comdat.h"
 #include "llvm/IR/GlobalObject.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Value.h"
 
 using namespace llvm;
 
diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index 622a984be22c..936b1fc2ff6f 100644
--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -30,8 +30,6 @@
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/MathExtras.h"
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index c13990af360e..b862a159127f 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp
@@ -16,16 +16,19 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalIFunc.h"
 #include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp
index 43df15e4d932..7ed156d552b1 100644
--- a/llvm/lib/IR/Core.cpp
+++ b/llvm/lib/IR/Core.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm-c/Core.h"
 #include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -27,6 +28,7 @@
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
diff --git a/llvm/lib/IR/DIBuilder.cpp b/llvm/lib/IR/DIBuilder.cpp
index a6e84dfbe1dd..dc5768dd4f26 100644
--- a/llvm/lib/IR/DIBuilder.cpp
+++ b/llvm/lib/IR/DIBuilder.cpp
@@ -13,12 +13,10 @@
 #include "llvm/IR/DIBuilder.h"
 #include "LLVMContextImpl.h"
 #include "llvm/ADT/Optional.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp
index 61b2b13bfd03..96f55cf14de8 100644
--- a/llvm/lib/IR/DataLayout.cpp
+++ b/llvm/lib/IR/DataLayout.cpp
@@ -30,12 +30,13 @@
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemAlloc.h"
 #include "llvm/Support/TypeSize.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
 #include <cstdlib>
-#include <tuple>
+#include <new>
 #include <utility>
 
 using namespace llvm;
diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp
index 98f25b035157..fd4b4170c0a7 100644
--- a/llvm/lib/IR/DebugInfo.cpp
+++ b/llvm/lib/IR/DebugInfo.cpp
@@ -14,17 +14,16 @@
 #include "llvm-c/DebugInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/None.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DebugLoc.h"
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GVMaterializer.h"
 #include "llvm/IR/Instruction.h"
diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp
index 59afb844eb89..b9fc5261fefe 100644
--- a/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -15,9 +15,9 @@
 #include "MetadataImpl.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/StringSwitch.h"
-#include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
 
 #include <numeric>
 
diff --git a/llvm/lib/IR/DebugLoc.cpp b/llvm/lib/IR/DebugLoc.cpp
index 993f3a39e6ff..34c9d026b19a 100644
--- a/llvm/lib/IR/DebugLoc.cpp
+++ b/llvm/lib/IR/DebugLoc.cpp
@@ -7,7 +7,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/IR/DebugLoc.h"
-#include "LLVMContextImpl.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/IR/DebugInfo.h"
 using namespace llvm;
diff --git a/llvm/lib/IR/DiagnosticInfo.cpp b/llvm/lib/IR/DiagnosticInfo.cpp
index 0a872a81f911..f46f0fdd947d 100644
--- a/llvm/lib/IR/DiagnosticInfo.cpp
+++ b/llvm/lib/IR/DiagnosticInfo.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/IR/DiagnosticInfo.h"
-#include "LLVMContextImpl.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/ADT/iterator_range.h"
@@ -24,22 +23,19 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/InstructionCost.h"
 #include "llvm/Support/Path.h"
-#include "llvm/Support/Regex.h"
 #include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Support/raw_ostream.h"
 #include <atomic>
-#include <cassert>
-#include <memory>
 #include <string>
 
 using namespace llvm;
diff --git a/llvm/lib/IR/Dominators.cpp b/llvm/lib/IR/Dominators.cpp
index ace708b252c7..aac8936c7bd6 100644
--- a/llvm/lib/IR/Dominators.cpp
+++ b/llvm/lib/IR/Dominators.cpp
@@ -14,19 +14,27 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/IR/Dominators.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/IR/CFG.h"
-#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/GenericDomTreeConstruction.h"
 #include "llvm/Support/raw_ostream.h"
-#include <algorithm>
+
+#include <cassert>
+
+namespace llvm {
+class Argument;
+class Constant;
+class Value;
+} // namespace llvm
 using namespace llvm;
 
 bool llvm::VerifyDomInfo = false;
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 1e874d7afa79..726ba80da41b 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -30,7 +30,6 @@
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/IntrinsicsAArch64.h"
@@ -63,7 +62,6 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
-#include <algorithm>
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp
index c832499dde06..47e8bc0a916d 100644
--- a/llvm/lib/IR/Globals.cpp
+++ b/llvm/lib/IR/Globals.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "LLVMContextImpl.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
@@ -21,7 +20,6 @@
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index 27528a69be21..4e8f1b506811 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -29,7 +29,6 @@
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/MathExtras.h"
 #include <cassert>
 #include <cstdint>
 #include <vector>
diff --git a/llvm/lib/IR/InlineAsm.cpp b/llvm/lib/IR/InlineAsm.cpp
index a0c48781ced5..203ad6dae1ff 100644
--- a/llvm/lib/IR/InlineAsm.cpp
+++ b/llvm/lib/IR/InlineAsm.cpp
@@ -22,7 +22,6 @@
 #include <algorithm>
 #include <cassert>
 #include <cctype>
-#include <cstddef>
 #include <cstdlib>
 
 using namespace llvm;
diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
index 59b7221d1fa2..36a20679863b 100644
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -16,7 +16,6 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/Type.h"
 using namespace llvm;
diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
index adea7abb75cf..e27758c5de02 100644
--- a/llvm/lib/IR/IntrinsicInst.cpp
+++ b/llvm/lib/IR/IntrinsicInst.cpp
@@ -24,14 +24,12 @@
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Statepoint.h"
 
-#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/IR/LLVMContext.cpp b/llvm/lib/IR/LLVMContext.cpp
index 90716d9c81a6..e19ead98a616 100644
--- a/llvm/lib/IR/LLVMContext.cpp
+++ b/llvm/lib/IR/LLVMContext.cpp
@@ -20,8 +20,6 @@
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/LLVMRemarkStreamer.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
 #include "llvm/Remarks/RemarkStreamer.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/llvm/lib/IR/LLVMContextImpl.cpp b/llvm/lib/IR/LLVMContextImpl.cpp
index ebbf382aea38..8f9530290459 100644
--- a/llvm/lib/IR/LLVMContextImpl.cpp
+++ b/llvm/lib/IR/LLVMContextImpl.cpp
@@ -11,12 +11,24 @@
 //===----------------------------------------------------------------------===//
 
 #include "LLVMContextImpl.h"
+#include "AttributeImpl.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/StringMapEntry.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/IR/DiagnosticHandler.h"
+#include "llvm/IR/LLVMRemarkStreamer.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/OptBisect.h"
 #include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/Remarks/RemarkStreamer.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/TypeSize.h"
 #include <cassert>
 #include <utility>
 
diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h
index 0b5f928165e8..70242f4d8f20 100644
--- a/llvm/lib/IR/LLVMContextImpl.h
+++ b/llvm/lib/IR/LLVMContextImpl.h
@@ -14,7 +14,6 @@
 #ifndef LLVM_LIB_IR_LLVMCONTEXTIMPL_H
 #define LLVM_LIB_IR_LLVMCONTEXTIMPL_H
 
-#include "AttributeImpl.h"
 #include "ConstantsContext.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
@@ -34,13 +33,14 @@
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/LLVMRemarkStreamer.h"
 #include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
 #include "llvm/IR/TrackingMDRef.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/StringSaver.h"
-#include "llvm/Support/YAMLTraits.h"
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
@@ -52,9 +52,23 @@
 
 namespace llvm {
 
+class AttributeImpl;
+class AttributeListImpl;
+class AttributeSetNode;
+class BasicBlock;
+struct DiagnosticHandler;
+class ElementCount;
+class Function;
+class GlobalObject;
+class GlobalValue;
+class InlineAsm;
+class LLVMRemarkStreamer;
+class OptPassGate;
+namespace remarks {
+class RemarkStreamer;
+}
+template <typename T> class StringMapEntry;
 class StringRef;
-class Type;
-class Value;
 class ValueHandleBase;
 
 using DenseMapAPIntKeyInfo = DenseMapInfo<APInt>;
diff --git a/llvm/lib/IR/LLVMRemarkStreamer.cpp b/llvm/lib/IR/LLVMRemarkStreamer.cpp
index 21ce47457f52..f7e2aa4e9a35 100644
--- a/llvm/lib/IR/LLVMRemarkStreamer.cpp
+++ b/llvm/lib/IR/LLVMRemarkStreamer.cpp
@@ -15,7 +15,9 @@
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
+#include "llvm/Remarks/RemarkStreamer.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/ToolOutputFile.h"
 
 using namespace llvm;
 
diff --git a/llvm/lib/IR/LegacyPassManager.cpp b/llvm/lib/IR/LegacyPassManager.cpp
index 4357c95aa9f6..08cf909a83f9 100644
--- a/llvm/lib/IR/LegacyPassManager.cpp
+++ b/llvm/lib/IR/LegacyPassManager.cpp
@@ -12,28 +12,27 @@
 
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LegacyPassManagers.h"
-#include "llvm/IR/LegacyPassNameParser.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassTimingInfo.h"
 #include "llvm/IR/PrintPasses.h"
-#include "llvm/IR/StructuralHash.h"
 #include "llvm/Support/Chrono.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/Mutex.h"
 #include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
-#include <unordered_set>
+
+#ifdef EXPENSIVE_CHECKS
+#include "llvm/IR/StructuralHash.h"
+#endif
+
 using namespace llvm;
 
 // See PassManagers.h for Pass Manager infrastructure overview.
diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp
index ebcc493407cc..226718ecac28 100644
--- a/llvm/lib/IR/Metadata.cpp
+++ b/llvm/lib/IR/Metadata.cpp
@@ -13,7 +13,6 @@
 #include "llvm/IR/Metadata.h"
 #include "LLVMContextImpl.h"
 #include "MetadataImpl.h"
-#include "SymbolTableListTraitsImpl.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -44,7 +43,6 @@
 #include "llvm/IR/TrackingMDRef.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
-#include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
@@ -52,8 +50,6 @@
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
-#include <iterator>
-#include <tuple>
 #include <type_traits>
 #include <utility>
 #include <vector>
diff --git a/llvm/lib/IR/Module.cpp b/llvm/lib/IR/Module.cpp
index a0485a59d0e0..4974b372db2a 100644
--- a/llvm/lib/IR/Module.cpp
+++ b/llvm/lib/IR/Module.cpp
@@ -13,7 +13,6 @@
 #include "llvm/IR/Module.h"
 #include "SymbolTableListTraitsImpl.h"
 #include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
@@ -39,7 +38,6 @@
 #include "llvm/IR/TypeFinder.h"
 #include "llvm/IR/Value.h"
 #include "llvm/IR/ValueSymbolTable.h"
-#include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/Error.h"
diff --git a/llvm/lib/IR/ModuleSummaryIndex.cpp b/llvm/lib/IR/ModuleSummaryIndex.cpp
index a0ac7d3ad7d3..0ca40a675fe4 100644
--- a/llvm/lib/IR/ModuleSummaryIndex.cpp
+++ b/llvm/lib/IR/ModuleSummaryIndex.cpp
@@ -14,7 +14,6 @@
 #include "llvm/IR/ModuleSummaryIndex.h"
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringMap.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/IR/Operator.cpp b/llvm/lib/IR/Operator.cpp
index 08c1fc931e2e..c2a4a7c29915 100644
--- a/llvm/lib/IR/Operator.cpp
+++ b/llvm/lib/IR/Operator.cpp
@@ -14,7 +14,6 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/Type.h"
 
 #include "ConstantsContext.h"
 
diff --git a/llvm/lib/IR/OptBisect.cpp b/llvm/lib/IR/OptBisect.cpp
index 55c0dbad5aab..418311eac814 100644
--- a/llvm/lib/IR/OptBisect.cpp
+++ b/llvm/lib/IR/OptBisect.cpp
@@ -17,7 +17,6 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
-#include <limits>
 
 using namespace llvm;
 
diff --git a/llvm/lib/IR/PassManager.cpp b/llvm/lib/IR/PassManager.cpp
index d933003ccdf7..3025c3853d5f 100644
--- a/llvm/lib/IR/PassManager.cpp
+++ b/llvm/lib/IR/PassManager.cpp
@@ -7,10 +7,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/IR/PassManager.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/IR/LLVMContext.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/IR/PassManagerImpl.h"
-#include "llvm/Support/CommandLine.h"
 
 using namespace llvm;
 
diff --git a/llvm/lib/IR/ProfileSummary.cpp b/llvm/lib/IR/ProfileSummary.cpp
index 05d5ac2c5ddf..9f7335ecbe44 100644
--- a/llvm/lib/IR/ProfileSummary.cpp
+++ b/llvm/lib/IR/ProfileSummary.cpp
@@ -12,9 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/IR/ProfileSummary.h"
-#include "llvm/IR/Attributes.h"
 #include "llvm/IR/Constants.h"
-#include "llvm/IR/Function.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/Casting.h"
diff --git a/llvm/lib/IR/PseudoProbe.cpp b/llvm/lib/IR/PseudoProbe.cpp
index 101cada77ff9..5cad887b295d 100644
--- a/llvm/lib/IR/PseudoProbe.cpp
+++ b/llvm/lib/IR/PseudoProbe.cpp
@@ -15,7 +15,7 @@
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instruction.h"
-#include <unordered_set>
+#include "llvm/IR/IntrinsicInst.h"
 
 using namespace llvm;
 
diff --git a/llvm/lib/IR/ReplaceConstant.cpp b/llvm/lib/IR/ReplaceConstant.cpp
index cfd8deba5a53..d2f676192e7f 100644
--- a/llvm/lib/IR/ReplaceConstant.cpp
+++ b/llvm/lib/IR/ReplaceConstant.cpp
@@ -12,9 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/IR/ReplaceConstant.h"
-#include "llvm/IR/IRBuilder.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/NoFolder.h"
 #include "llvm/IR/ValueMap.h"
 
 namespace llvm {
diff --git a/llvm/lib/IR/SSAContext.cpp b/llvm/lib/IR/SSAContext.cpp
index a96e39f32882..5b865692dd7f 100644
--- a/llvm/lib/IR/SSAContext.cpp
+++ b/llvm/lib/IR/SSAContext.cpp
@@ -13,10 +13,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/IR/SSAContext.h"
-#include "llvm/IR/Argument.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instruction.h"
+#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/IR/Value.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
diff --git a/llvm/lib/IR/SafepointIRVerifier.cpp b/llvm/lib/IR/SafepointIRVerifier.cpp
index 2117527a64f0..d8634e0ac7dd 100644
--- a/llvm/lib/IR/SafepointIRVerifier.cpp
+++ b/llvm/lib/IR/SafepointIRVerifier.cpp
@@ -38,10 +38,8 @@
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Module.h"
 #include "llvm/IR/Statepoint.h"
 #include "llvm/IR/Value.h"
 #include "llvm/InitializePasses.h"
diff --git a/llvm/lib/IR/Statepoint.cpp b/llvm/lib/IR/Statepoint.cpp
index b5916e4937c6..508e3cb71ed2 100644
--- a/llvm/lib/IR/Statepoint.cpp
+++ b/llvm/lib/IR/Statepoint.cpp
@@ -13,8 +13,6 @@
 
 #include "llvm/IR/Statepoint.h"
 
-#include "llvm/IR/Function.h"
-
 using namespace llvm;
 
 bool llvm::isStatepointDirectiveAttr(Attribute Attr) {
diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp
index d59d87ad631b..85b658c8a52f 100644
--- a/llvm/lib/IR/Type.cpp
+++ b/llvm/lib/IR/Type.cpp
@@ -21,10 +21,8 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/TypeSize.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
diff --git a/llvm/lib/IR/Use.cpp b/llvm/lib/IR/Use.cpp
index 99049c0232aa..601a9df5279e 100644
--- a/llvm/lib/IR/Use.cpp
+++ b/llvm/lib/IR/Use.cpp
@@ -8,11 +8,13 @@
 
 #include "llvm/IR/Use.h"
 #include "llvm/IR/User.h"
-#include "llvm/IR/Value.h"
-#include <new>
 
 namespace llvm {
 
+class User;
+template <typename> struct simplify_type;
+class Value;
+
 void Use::swap(Use &RHS) {
   if (Val == RHS.Val)
     return;
diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp
index 8741ed917f9f..18aef37e2023 100644
--- a/llvm/lib/IR/Value.cpp
+++ b/llvm/lib/IR/Value.cpp
@@ -13,7 +13,6 @@
 #include "llvm/IR/Value.h"
 #include "LLVMContextImpl.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
@@ -21,7 +20,6 @@
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/DerivedUser.h"
-#include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -32,7 +30,6 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index b84edb789405..989d01e2e395 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -58,7 +58,6 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
-#include "llvm/ADT/ilist.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/Attributes.h"
@@ -70,7 +69,6 @@
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -5811,15 +5809,11 @@ void Verifier::verifyAttachedCallBundle(const CallBase &Call,
          "void return type",
          Call);
 
-  Assert((BU.Inputs.empty() ||
-          (BU.Inputs.size() == 1 && isa<Function>(BU.Inputs.front()))),
-         "operand bundle \"clang.arc.attachedcall\" can take either no "
-         "arguments or one function as an argument",
+  Assert(BU.Inputs.size() == 1 && isa<Function>(BU.Inputs.front()),
+         "operand bundle \"clang.arc.attachedcall\" requires one function as "
+         "an argument",
          Call);
 
-  if (BU.Inputs.empty())
-    return;
-
   auto *Fn = cast<Function>(BU.Inputs.front());
   Intrinsic::ID IID = Fn->getIntrinsicID();
 
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index f26ef4b21996..418aad26fdd6 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -46,6 +46,7 @@
 #include "llvm/Support/ThreadPool.h"
 #include "llvm/Support/Threading.h"
 #include "llvm/Support/TimeProfiler.h"
+#include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/VCSRevision.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
@@ -1372,7 +1373,7 @@ public:
                       sys::fs::OpenFlags::OF_None);
     if (EC)
       return errorCodeToError(EC);
-    WriteIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex);
+    writeIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex);
 
     if (ShouldEmitImportsFiles) {
       EC = EmitImportsFiles(ModulePath, NewModulePath + ".imports",
diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index 7694c9848384..3877def53c3f 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -38,6 +38,7 @@
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/ThreadPool.h"
+#include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/IPO.h"
@@ -144,7 +145,7 @@ Error Config::addSaveTemps(std::string OutputFileName,
         // directly and exit.
         if (EC)
           reportOpenError(Path, EC.message());
-        WriteIndexToFile(Index, OS);
+        writeIndexToFile(Index, OS);
 
         Path = OutputFileName + "index.dot";
         raw_fd_ostream OSDot(Path, EC, sys::fs::OpenFlags::OF_None);
@@ -359,7 +360,7 @@ bool lto::opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
       LLVM_DEBUG(
           dbgs() << "Post-(Thin)LTO merge bitcode embedding was requested, but "
                     "command line arguments are not available");
-    llvm::EmbedBitcodeInModule(Mod, llvm::MemoryBufferRef(),
+    llvm::embedBitcodeInModule(Mod, llvm::MemoryBufferRef(),
                                /*EmbedBitcode*/ true, /*EmbedCmdline*/ true,
                                /*Cmdline*/ CmdArgs);
   }
@@ -380,7 +381,7 @@ static void codegen(const Config &Conf, TargetMachine *TM,
     return;
 
   if (EmbedBitcode == LTOBitcodeEmbedding::EmbedOptimized)
-    llvm::EmbedBitcodeInModule(Mod, llvm::MemoryBufferRef(),
+    llvm::embedBitcodeInModule(Mod, llvm::MemoryBufferRef(),
                                /*EmbedBitcode*/ true,
                                /*EmbedCmdline*/ false,
                                /*CmdArgs*/ std::vector<uint8_t>());
diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
index 9aea27f0fdba..37e85b6af6ba 100644
--- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -1052,7 +1052,7 @@ void ThinLTOCodeGenerator::run() {
     if (EC)
       report_fatal_error(Twine("Failed to open ") + SaveTempPath +
                          " to save optimized bitcode\n");
-    WriteIndexToFile(*Index, OS);
+    writeIndexToFile(*Index, OS);
   }
 
 
diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
index 119237bb052e..61ec941f50b8 100644
--- a/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/llvm/lib/MC/MCAsmStreamer.cpp
@@ -788,7 +788,7 @@ void MCAsmStreamer::emitSyntaxDirective() {
 }
 
 void MCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {
-  OS << "\t.def\t ";
+  OS << "\t.def\t";
   Symbol->print(OS, MAI);
   OS << ';';
   EmitEOL();
diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp
index 7f639e9c408f..eafcee1e0607 100644
--- a/llvm/lib/MC/MCContext.cpp
+++ b/llvm/lib/MC/MCContext.cpp
@@ -67,10 +67,10 @@ static void defaultDiagHandler(const SMDiagnostic &SMD, bool, const SourceMgr &,
 MCContext::MCContext(const Triple &TheTriple, const MCAsmInfo *mai,
                      const MCRegisterInfo *mri, const MCSubtargetInfo *msti,
                      const SourceMgr *mgr, MCTargetOptions const *TargetOpts,
-                     bool DoAutoReset)
-    : TT(TheTriple), SrcMgr(mgr), InlineSrcMgr(nullptr),
-      DiagHandler(defaultDiagHandler), MAI(mai), MRI(mri), MSTI(msti),
-      Symbols(Allocator), UsedNames(Allocator),
+                     bool DoAutoReset, StringRef Swift5ReflSegmentName)
+    : Swift5ReflectionSegmentName(Swift5ReflSegmentName), TT(TheTriple),
+      SrcMgr(mgr), InlineSrcMgr(nullptr), DiagHandler(defaultDiagHandler),
+      MAI(mai), MRI(mri), MSTI(msti), Symbols(Allocator), UsedNames(Allocator),
       InlineAsmUsedLabelNames(Allocator),
       CurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_IS_STMT, 0, 0),
       AutoReset(DoAutoReset), TargetOptions(TargetOpts) {
diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp
index d7f85f793c55..b7890e7f0937 100644
--- a/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -299,6 +299,18 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) {
   RemarksSection = Ctx->getMachOSection(
       "__LLVM", "__remarks", MachO::S_ATTR_DEBUG, SectionKind::getMetadata());
 
+  // The architecture of dsymutil makes it very difficult to copy the Swift
+  // reflection metadata sections into the __TEXT segment, so dsymutil creates
+  // these sections in the __DWARF segment instead.
+  if (!Ctx->getSwift5ReflectionSegmentName().empty()) {
+#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF)                           \
+  Swift5ReflectionSections                                                     \
+      [llvm::binaryformat::Swift5ReflectionSectionKind::KIND] =                \
+          Ctx->getMachOSection(Ctx->getSwift5ReflectionSegmentName().data(),   \
+                               MACHO, 0, SectionKind::getMetadata());
+#include "llvm/BinaryFormat/Swift.def"
+  }
+
   TLSExtraDataSection = TLSTLVSection;
 }
 
diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp
index 42e257516f4e..3d95b18f4672 100644
--- a/llvm/lib/Object/MachOObjectFile.cpp
+++ b/llvm/lib/Object/MachOObjectFile.cpp
@@ -20,6 +20,7 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/BinaryFormat/MachO.h"
+#include "llvm/BinaryFormat/Swift.h"
 #include "llvm/Object/Error.h"
 #include "llvm/Object/MachO.h"
 #include "llvm/Object/ObjectFile.h"
@@ -4765,3 +4766,15 @@ MachOObjectFile::findDsymObjectMembers(StringRef Path) {
                              Path.str().c_str());
   return ObjectPaths;
 }
+
+llvm::binaryformat::Swift5ReflectionSectionKind
+MachOObjectFile::mapReflectionSectionNameToEnumValue(
+    StringRef SectionName) const {
+#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF)                           \
+  .Case(MACHO, llvm::binaryformat::Swift5ReflectionSectionKind::KIND)
+  return StringSwitch<llvm::binaryformat::Swift5ReflectionSectionKind>(
+             SectionName)
+#include "llvm/BinaryFormat/Swift.def"
+      .Default(llvm::binaryformat::Swift5ReflectionSectionKind::unknown);
+#undef HANDLE_SWIFT_SECTION
+}
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index ffe2599beaf8..d597148b98ab 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -579,6 +579,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
       BCase(EF_AMDGPU_FEATURE_SRAMECC_V3);
       break;
     case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
+    case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
       BCaseMask(EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4,
                 EF_AMDGPU_FEATURE_XNACK_V4);
       BCaseMask(EF_AMDGPU_FEATURE_XNACK_ANY_V4,
diff --git a/llvm/lib/ObjectYAML/WasmEmitter.cpp b/llvm/lib/ObjectYAML/WasmEmitter.cpp
index 80a8c56f6912..2aa2ef3e5541 100644
--- a/llvm/lib/ObjectYAML/WasmEmitter.cpp
+++ b/llvm/lib/ObjectYAML/WasmEmitter.cpp
@@ -585,19 +585,8 @@ void WasmWriter::writeRelocSection(raw_ostream &OS, WasmYAML::Section &Sec,
     writeUint8(OS, Reloc.Type);
     encodeULEB128(Reloc.Offset, OS);
     encodeULEB128(Reloc.Index, OS);
-    switch (Reloc.Type) {
-    case wasm::R_WASM_MEMORY_ADDR_LEB:
-    case wasm::R_WASM_MEMORY_ADDR_LEB64:
-    case wasm::R_WASM_MEMORY_ADDR_SLEB:
-    case wasm::R_WASM_MEMORY_ADDR_SLEB64:
-    case wasm::R_WASM_MEMORY_ADDR_I32:
-    case wasm::R_WASM_MEMORY_ADDR_I64:
-    case wasm::R_WASM_FUNCTION_OFFSET_I32:
-    case wasm::R_WASM_FUNCTION_OFFSET_I64:
-    case wasm::R_WASM_SECTION_OFFSET_I32:
+    if (wasm::relocTypeHasAddend(Reloc.Type))
       encodeSLEB128(Reloc.Addend, OS);
-      break;
-    }
   }
 }
 
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 6110bda02406..93637c890c4f 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1454,6 +1454,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
     MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
   }
 
+  // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
+  MPM.addPass(OpenMPOptPass());
+
   // Remove unused virtual tables to improve the quality of code generated by
   // whole-program devirtualization and bitset lowering.
   MPM.addPass(GlobalDCEPass());
@@ -1648,6 +1651,10 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
 
   addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
 
+  // Run the OpenMPOpt CGSCC pass again late.
+  MPM.addPass(
+      createModuleToPostOrderCGSCCPassAdaptor(OpenMPOptCGSCCPass()));
+
   invokePeepholeEPCallbacks(MainFPM, Level);
   MainFPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true));
   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 051655e1fed6..07d467305ae5 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -1181,32 +1181,6 @@ bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken) {
   return true;
 }
 
-// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
-// aware this is an ir_level profile so it can set the version flag.
-GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS,
-                                            bool InstrEntryBBEnabled,
-                                            bool DebugInfoCorrelate) {
-  const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
-  Type *IntTy64 = Type::getInt64Ty(M.getContext());
-  uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
-  if (IsCS)
-    ProfileVersion |= VARIANT_MASK_CSIR_PROF;
-  if (InstrEntryBBEnabled)
-    ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
-  if (DebugInfoCorrelate)
-    ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
-  auto IRLevelVersionVariable = new GlobalVariable(
-      M, IntTy64, true, GlobalValue::WeakAnyLinkage,
-      Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
-  IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility);
-  Triple TT(M.getTargetTriple());
-  if (TT.supportsCOMDAT()) {
-    IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
-    IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
-  }
-  return IRLevelVersionVariable;
-}
-
 // Create the variable for the profile file name.
 void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput) {
   if (InstrProfileOutput.empty())
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index 861ff61df510..138b1532d778 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -38,6 +38,28 @@
 
 using namespace llvm;
 
+// Extracts the variant information from the top 8 bits in the version and
+// returns an enum specifying the variants present.
+static InstrProfKind getProfileKindFromVersion(uint64_t Version) {
+  InstrProfKind ProfileKind = InstrProfKind::Unknown;
+  if (Version & VARIANT_MASK_IR_PROF) {
+    ProfileKind |= InstrProfKind::IR;
+  }
+  if (Version & VARIANT_MASK_CSIR_PROF) {
+    ProfileKind |= InstrProfKind::CS;
+  }
+  if (Version & VARIANT_MASK_INSTR_ENTRY) {
+    ProfileKind |= InstrProfKind::BB;
+  }
+  if (Version & VARIANT_MASK_BYTE_COVERAGE) {
+    ProfileKind |= InstrProfKind::SingleByteCoverage;
+  }
+  if (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) {
+    ProfileKind |= InstrProfKind::FunctionEntryOnly;
+  }
+  return ProfileKind;
+}
+
 static Expected<std::unique_ptr<MemoryBuffer>>
 setupMemoryBuffer(const Twine &Path) {
   ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
@@ -154,30 +176,24 @@ bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) {
 // with a leading ':' will be reported an error format.
 Error TextInstrProfReader::readHeader() {
   Symtab.reset(new InstrProfSymtab());
-  bool IsIRInstr = false;
-  bool IsEntryFirst = false;
-  bool IsCS = false;
 
   while (Line->startswith(":")) {
     StringRef Str = Line->substr(1);
     if (Str.equals_insensitive("ir"))
-      IsIRInstr = true;
+      ProfileKind |= InstrProfKind::IR;
     else if (Str.equals_insensitive("fe"))
-      IsIRInstr = false;
+      ProfileKind |= InstrProfKind::FE;
     else if (Str.equals_insensitive("csir")) {
-      IsIRInstr = true;
-      IsCS = true;
+      ProfileKind |= InstrProfKind::IR;
+      ProfileKind |= InstrProfKind::CS;
     } else if (Str.equals_insensitive("entry_first"))
-      IsEntryFirst = true;
+      ProfileKind |= InstrProfKind::BB;
     else if (Str.equals_insensitive("not_entry_first"))
-      IsEntryFirst = false;
+      ProfileKind &= ~InstrProfKind::BB;
     else
       return error(instrprof_error::bad_header);
     ++Line;
   }
-  IsIRLevelProfile = IsIRInstr;
-  InstrEntryBBEnabled = IsEntryFirst;
-  HasCSIRLevelProfile = IsCS;
   return success();
 }
 
@@ -304,6 +320,11 @@ Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
 }
 
 template <class IntPtrT>
+InstrProfKind RawInstrProfReader<IntPtrT>::getProfileKind() const {
+  return getProfileKindFromVersion(Version);
+}
+
+template <class IntPtrT>
 bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) {
   if (DataBuffer.getBufferSize() < sizeof(uint64_t))
     return false;
@@ -485,9 +506,15 @@ Error RawInstrProfReader<IntPtrT>::readRawCounts(
   Record.Counts.clear();
   Record.Counts.reserve(NumCounters);
   for (uint32_t I = 0; I < NumCounters; I++) {
-    const auto *CounterValue = reinterpret_cast<const uint64_t *>(
-        CountersStart + CounterBaseOffset + I * getCounterTypeSize());
-    Record.Counts.push_back(swap(*CounterValue));
+    const char *Ptr =
+        CountersStart + CounterBaseOffset + I * getCounterTypeSize();
+    if (hasSingleByteCoverage()) {
+      // A value of zero signifies the block is covered.
+      Record.Counts.push_back(*Ptr == 0 ? 1 : 0);
+    } else {
+      const auto *CounterValue = reinterpret_cast<const uint64_t *>(Ptr);
+      Record.Counts.push_back(swap(*CounterValue));
+    }
   }
 
   return success();
@@ -718,6 +745,11 @@ InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex(
   RecordIterator = HashTable->data_begin();
 }
 
+template <typename HashTableImpl>
+InstrProfKind InstrProfReaderIndex<HashTableImpl>::getProfileKind() const {
+  return getProfileKindFromVersion(FormatVersion);
+}
+
 namespace {
 /// A remapper that does not apply any remappings.
 class InstrProfReaderNullRemapper : public InstrProfReaderRemapper {
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 6628eea80640..8ded1c0426e5 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -166,9 +166,8 @@ public:
 
 } // end namespace llvm
 
-InstrProfWriter::InstrProfWriter(bool Sparse, bool InstrEntryBBEnabled)
-    : Sparse(Sparse), InstrEntryBBEnabled(InstrEntryBBEnabled),
-      InfoObj(new InstrProfRecordWriterTrait()) {}
+InstrProfWriter::InstrProfWriter(bool Sparse)
+    : Sparse(Sparse), InfoObj(new InstrProfRecordWriterTrait()) {}
 
 InstrProfWriter::~InstrProfWriter() { delete InfoObj; }
 
@@ -303,14 +302,16 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   IndexedInstrProf::Header Header;
   Header.Magic = IndexedInstrProf::Magic;
   Header.Version = IndexedInstrProf::ProfVersion::CurrentVersion;
-  if (ProfileKind == PF_IRLevel)
-    Header.Version |= VARIANT_MASK_IR_PROF;
-  if (ProfileKind == PF_IRLevelWithCS) {
+  if (static_cast<bool>(ProfileKind & InstrProfKind::IR))
     Header.Version |= VARIANT_MASK_IR_PROF;
+  if (static_cast<bool>(ProfileKind & InstrProfKind::CS))
     Header.Version |= VARIANT_MASK_CSIR_PROF;
-  }
-  if (InstrEntryBBEnabled)
+  if (static_cast<bool>(ProfileKind & InstrProfKind::BB))
     Header.Version |= VARIANT_MASK_INSTR_ENTRY;
+  if (static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage))
+    Header.Version |= VARIANT_MASK_BYTE_COVERAGE;
+  if (static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryOnly))
+    Header.Version |= VARIANT_MASK_FUNCTION_ENTRY_ONLY;
 
   Header.Unused = 0;
   Header.HashType = static_cast<uint64_t>(IndexedInstrProf::HashType);
@@ -337,7 +338,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
     OS.write(0);
   uint64_t CSSummaryOffset = 0;
   uint64_t CSSummarySize = 0;
-  if (ProfileKind == PF_IRLevelWithCS) {
+  if (static_cast<bool>(ProfileKind & InstrProfKind::CS)) {
     CSSummaryOffset = OS.tell();
     CSSummarySize = SummarySize / sizeof(uint64_t);
     for (unsigned I = 0; I < CSSummarySize; I++)
@@ -358,7 +359,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
 
   // For Context Sensitive summary.
   std::unique_ptr<IndexedInstrProf::Summary> TheCSSummary = nullptr;
-  if (ProfileKind == PF_IRLevelWithCS) {
+  if (static_cast<bool>(ProfileKind & InstrProfKind::CS)) {
     TheCSSummary = IndexedInstrProf::allocSummary(SummarySize);
     std::unique_ptr<ProfileSummary> CSPS = CSISB.getSummary();
     setSummary(TheCSSummary.get(), *CSPS);
@@ -470,11 +471,13 @@ void InstrProfWriter::writeRecordInText(StringRef Name, uint64_t Hash,
 }
 
 Error InstrProfWriter::writeText(raw_fd_ostream &OS) {
-  if (ProfileKind == PF_IRLevel)
-    OS << "# IR level Instrumentation Flag\n:ir\n";
-  else if (ProfileKind == PF_IRLevelWithCS)
+  // Check CS first since it implies an IR level profile.
+  if (static_cast<bool>(ProfileKind & InstrProfKind::CS))
     OS << "# CSIR level Instrumentation Flag\n:csir\n";
-  if (InstrEntryBBEnabled)
+  else if (static_cast<bool>(ProfileKind & InstrProfKind::IR))
+    OS << "# IR level Instrumentation Flag\n:ir\n";
+
+  if (static_cast<bool>(ProfileKind & InstrProfKind::BB))
     OS << "# Always instrument the function entry block\n:entry_first\n";
   InstrProfSymtab Symtab;
 
diff --git a/llvm/lib/Remarks/BitstreamRemarkParser.cpp b/llvm/lib/Remarks/BitstreamRemarkParser.cpp
index 3d586a247962..d74fff4ca7c5 100644
--- a/llvm/lib/Remarks/BitstreamRemarkParser.cpp
+++ b/llvm/lib/Remarks/BitstreamRemarkParser.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/Remarks/BitstreamRemarkParser.h"
 #include "BitstreamRemarkParser.h"
+#include "llvm/Remarks/Remark.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 
diff --git a/llvm/lib/Remarks/BitstreamRemarkParser.h b/llvm/lib/Remarks/BitstreamRemarkParser.h
index 0e40e5d66e00..988bc30da6e1 100644
--- a/llvm/lib/Remarks/BitstreamRemarkParser.h
+++ b/llvm/lib/Remarks/BitstreamRemarkParser.h
@@ -16,7 +16,6 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/Remarks/BitstreamRemarkContainer.h"
 #include "llvm/Remarks/BitstreamRemarkParser.h"
-#include "llvm/Remarks/Remark.h"
 #include "llvm/Remarks/RemarkFormat.h"
 #include "llvm/Remarks/RemarkParser.h"
 #include <cstdint>
@@ -24,6 +23,9 @@
 
 namespace llvm {
 namespace remarks {
+
+struct Remark;
+
 /// Parses and holds the state of the latest parsed remark.
 struct BitstreamRemarkParser : public RemarkParser {
   /// The buffer to parse.
diff --git a/llvm/lib/Remarks/RemarkLinker.cpp b/llvm/lib/Remarks/RemarkLinker.cpp
index dd1bba3d1762..62f80918ea1d 100644
--- a/llvm/lib/Remarks/RemarkLinker.cpp
+++ b/llvm/lib/Remarks/RemarkLinker.cpp
@@ -12,10 +12,12 @@
 
 #include "llvm/Remarks/RemarkLinker.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Remarks/BitstreamRemarkContainer.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/SymbolicFile.h"
 #include "llvm/Remarks/RemarkParser.h"
 #include "llvm/Remarks/RemarkSerializer.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 using namespace llvm::remarks;
diff --git a/llvm/lib/Remarks/RemarkParser.cpp b/llvm/lib/Remarks/RemarkParser.cpp
index c5c3d0badd3e..f36767efcbf4 100644
--- a/llvm/lib/Remarks/RemarkParser.cpp
+++ b/llvm/lib/Remarks/RemarkParser.cpp
@@ -15,7 +15,6 @@
 #include "BitstreamRemarkParser.h"
 #include "YAMLRemarkParser.h"
 #include "llvm-c/Remarks.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/CBindingWrapping.h"
 
 using namespace llvm;
diff --git a/llvm/lib/Remarks/YAMLRemarkParser.h b/llvm/lib/Remarks/YAMLRemarkParser.h
index df3b908f4779..88b3003010d3 100644
--- a/llvm/lib/Remarks/YAMLRemarkParser.h
+++ b/llvm/lib/Remarks/YAMLRemarkParser.h
@@ -14,14 +14,12 @@
 #define LLVM_REMARKS_YAML_REMARK_PARSER_H
 
 #include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/Remarks/Remark.h"
 #include "llvm/Remarks/RemarkParser.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/YAMLParser.h"
-#include "llvm/Support/YAMLTraits.h"
 #include "llvm/Support/raw_ostream.h"
 #include <string>
 
diff --git a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp
index 827e04f0b10f..9e965aa4f6c4 100644
--- a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp
+++ b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Remarks/YAMLRemarkSerializer.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
 
 using namespace llvm;
diff --git a/llvm/lib/Support/ARMAttributeParser.cpp b/llvm/lib/Support/ARMAttributeParser.cpp
index 908e56319025..9ba224cee0ca 100644
--- a/llvm/lib/Support/ARMAttributeParser.cpp
+++ b/llvm/lib/Support/ARMAttributeParser.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/ARMAttributeParser.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLArrayExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/ScopedPrinter.h"
 
diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp
index 9a4470289bcf..f6003b783245 100644
--- a/llvm/lib/Support/Host.cpp
+++ b/llvm/lib/Support/Host.cpp
@@ -211,6 +211,7 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
         .Case("0xd0d", "cortex-a77")
         .Case("0xd41", "cortex-a78")
         .Case("0xd44", "cortex-x1")
+        .Case("0xd4c", "cortex-x1c")
         .Case("0xd0c", "neoverse-n1")
         .Case("0xd49", "neoverse-n2")
         .Case("0xd40", "neoverse-v1")
diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index 6c59d8a7ef04..2b3395b669b8 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -461,15 +461,7 @@ RISCVISAInfo::parseFeatures(unsigned XLen,
       ISAInfo->Exts.erase(ExtName.str());
   }
 
-  ISAInfo->updateImplication();
-  ISAInfo->updateFLen();
-  ISAInfo->updateMinVLen();
-  ISAInfo->updateMaxELen();
-
-  if (Error Result = ISAInfo->checkDependency())
-    return std::move(Result);
-
-  return std::move(ISAInfo);
+  return RISCVISAInfo::postProcessAndChecking(std::move(ISAInfo));
 }
 
 llvm::Expected<std::unique_ptr<RISCVISAInfo>>
@@ -686,26 +678,18 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
     }
   }
 
-  ISAInfo->updateImplication();
-  ISAInfo->updateFLen();
-  ISAInfo->updateMinVLen();
-  ISAInfo->updateMaxELen();
-
-  if (Error Result = ISAInfo->checkDependency())
-    return std::move(Result);
-
-  return std::move(ISAInfo);
+  return RISCVISAInfo::postProcessAndChecking(std::move(ISAInfo));
 }
 
 Error RISCVISAInfo::checkDependency() {
   bool IsRv32 = XLen == 32;
-  bool HasE = Exts.count("e") == 1;
-  bool HasD = Exts.count("d") == 1;
-  bool HasF = Exts.count("f") == 1;
-  bool HasZve32x = Exts.count("zve32x") == 1;
-  bool HasZve32f = Exts.count("zve32f") == 1;
-  bool HasZve64d = Exts.count("zve64d") == 1;
-  bool HasV = Exts.count("v") == 1;
+  bool HasE = Exts.count("e") != 0;
+  bool HasD = Exts.count("d") != 0;
+  bool HasF = Exts.count("f") != 0;
+  bool HasZve32x = Exts.count("zve32x") != 0;
+  bool HasZve32f = Exts.count("zve32f") != 0;
+  bool HasZve64d = Exts.count("zve64d") != 0;
+  bool HasV = Exts.count("v") != 0;
   bool HasVector = HasZve32x || HasV;
   bool HasZvl = MinVLen != 0;
 
@@ -739,12 +723,6 @@ Error RISCVISAInfo::checkDependency() {
         errc::invalid_argument,
         "zvl*b requires v or zve* extension to also be specified");
 
-  // Could not implement Zve* extension and the V extension at the same time.
-  if (HasZve32x && HasV)
-    return createStringError(
-        errc::invalid_argument,
-        "It is illegal to specify the v extension with zve* extensions");
-
   // Additional dependency checks.
   // TODO: The 'q' extension requires rv64.
   // TODO: It is illegal to specify 'e' extensions with 'f' and 'd'.
@@ -753,7 +731,8 @@ Error RISCVISAInfo::checkDependency() {
 }
 
 static const char *ImpliedExtsV[] = {"zvl128b", "f", "d"};
-static const char *ImpliedExtsZfh[] = {"zfhmin"};
+static const char *ImpliedExtsZfhmin[] = {"f"};
+static const char *ImpliedExtsZfh[] = {"f"};
 static const char *ImpliedExtsZve64d[] = {"zve64f"};
 static const char *ImpliedExtsZve64f[] = {"zve64x", "zve32f"};
 static const char *ImpliedExtsZve64x[] = {"zve32x", "zvl64b"};
@@ -785,9 +764,11 @@ struct ImpliedExtsEntry {
   bool operator<(StringRef Other) const { return Name < Other; }
 };
 
+// Note: The table needs to be sorted by name.
 static constexpr ImpliedExtsEntry ImpliedExts[] = {
     {{"v"}, {ImpliedExtsV}},
     {{"zfh"}, {ImpliedExtsZfh}},
+    {{"zfhmin"}, {ImpliedExtsZfhmin}},
     {{"zk"}, {ImpliedExtsZk}},
     {{"zkn"}, {ImpliedExtsZkn}},
     {{"zks"}, {ImpliedExtsZks}},
@@ -810,8 +791,8 @@ static constexpr ImpliedExtsEntry ImpliedExts[] = {
 };
 
 void RISCVISAInfo::updateImplication() {
-  bool HasE = Exts.count("e") == 1;
-  bool HasI = Exts.count("i") == 1;
+  bool HasE = Exts.count("e") != 0;
+  bool HasI = Exts.count("i") != 0;
 
   // If not in e extension and i extension does not exist, i extension is
   // implied
@@ -919,3 +900,15 @@ std::vector<std::string> RISCVISAInfo::toFeatureVector() const {
   }
   return FeatureVector;
 }
+
+llvm::Expected<std::unique_ptr<RISCVISAInfo>>
+RISCVISAInfo::postProcessAndChecking(std::unique_ptr<RISCVISAInfo> &&ISAInfo) {
+  ISAInfo->updateImplication();
+  ISAInfo->updateFLen();
+  ISAInfo->updateMinVLen();
+  ISAInfo->updateMaxELen();
+
+  if (Error Result = ISAInfo->checkDependency())
+    return std::move(Result);
+  return std::move(ISAInfo);
+}
diff --git a/llvm/lib/Support/Signals.cpp b/llvm/lib/Support/Signals.cpp
index 5ce41c987029..1d61f2bf7525 100644
--- a/llvm/lib/Support/Signals.cpp
+++ b/llvm/lib/Support/Signals.cpp
@@ -15,7 +15,7 @@
 
 #include "DebugOptions.h"
 
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLArrayExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/CommandLine.h"
diff --git a/llvm/lib/Support/Triple.cpp b/llvm/lib/Support/Triple.cpp
index 20dea8c302a5..a9afcc9db96a 100644
--- a/llvm/lib/Support/Triple.cpp
+++ b/llvm/lib/Support/Triple.cpp
@@ -7,14 +7,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/Triple.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLArrayExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/ARMTargetParser.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/SwapByteOrder.h"
-#include "llvm/Support/ARMTargetParser.h"
 #include "llvm/Support/VersionTuple.h"
 #include <cassert>
 #include <cstring>
diff --git a/llvm/lib/Support/Valgrind.cpp b/llvm/lib/Support/Valgrind.cpp
index 3cf41faeb55d..5994656c5c03 100644
--- a/llvm/lib/Support/Valgrind.cpp
+++ b/llvm/lib/Support/Valgrind.cpp
@@ -12,9 +12,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include <stddef.h>
 #include "llvm/Support/Valgrind.h"
 #include "llvm/Config/config.h"
-#include <cstddef>
 
 #if HAVE_VALGRIND_VALGRIND_H
 #include <valgrind/valgrind.h>
diff --git a/llvm/lib/Support/Windows/Host.inc b/llvm/lib/Support/Windows/Host.inc
index 5583db909045..fa6b00f19b9a 100644
--- a/llvm/lib/Support/Windows/Host.inc
+++ b/llvm/lib/Support/Windows/Host.inc
@@ -10,6 +10,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+// We need to include config.h here because LLVM_DEFAULT_TARGET_TRIPLE is not
+// defined in llvm-config.h if it is unset.
+#include "llvm/Config/config.h"
 #include "llvm/Support/Windows/WindowsSupport.h"
 #include <cstdio>
 #include <string>
diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp
index e4b747b68bea..69d4fe96bee8 100644
--- a/llvm/lib/Support/raw_ostream.cpp
+++ b/llvm/lib/Support/raw_ostream.cpp
@@ -11,7 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLArrayExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Config/config.h"
 #include "llvm/Support/Compiler.h"
diff --git a/llvm/lib/TableGen/DetailedRecordsBackend.cpp b/llvm/lib/TableGen/DetailedRecordsBackend.cpp
index e181f79b903d..500aa4c78225 100644
--- a/llvm/lib/TableGen/DetailedRecordsBackend.cpp
+++ b/llvm/lib/TableGen/DetailedRecordsBackend.cpp
@@ -13,15 +13,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/Format.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SMLoc.h"
 #include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/TableGenBackend.h"
+#include <map>
+#include <memory>
 #include <string>
 #include <utility>
 
diff --git a/llvm/lib/TableGen/JSONBackend.cpp b/llvm/lib/TableGen/JSONBackend.cpp
index 8ddfd9f04524..e38903910275 100644
--- a/llvm/lib/TableGen/JSONBackend.cpp
+++ b/llvm/lib/TableGen/JSONBackend.cpp
@@ -11,12 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/TableGenBackend.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/JSON.h"
+#include "llvm/TableGen/Record.h"
 
 #define DEBUG_TYPE "json-emitter"
 
diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp
index 762255b43136..1d5f130737ee 100644
--- a/llvm/lib/TableGen/Main.cpp
+++ b/llvm/lib/TableGen/Main.cpp
@@ -16,7 +16,6 @@
 
 #include "llvm/TableGen/Main.h"
 #include "TGParser.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -24,7 +23,6 @@
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include <algorithm>
-#include <cstdio>
 #include <system_error>
 using namespace llvm;
 
diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp
index eb7d4838a9f6..58d8c9936896 100644
--- a/llvm/lib/TableGen/Record.cpp
+++ b/llvm/lib/TableGen/Record.cpp
@@ -10,16 +10,15 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/TableGen/Record.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSet.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Casting.h"
@@ -29,11 +28,10 @@
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Record.h"
 #include <cassert>
 #include <cstdint>
-#include <memory>
 #include <map>
+#include <memory>
 #include <string>
 #include <utility>
 #include <vector>
@@ -2289,8 +2287,8 @@ bool RecordVal::setValue(Init *V, SMLoc NewLoc) {
   return false;
 }
 
-#include "llvm/TableGen/Record.h"
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+#include "llvm/TableGen/Record.h"
 LLVM_DUMP_METHOD void RecordVal::dump() const { errs() << *this; }
 #endif
 
diff --git a/llvm/lib/TableGen/SetTheory.cpp b/llvm/lib/TableGen/SetTheory.cpp
index f7ba75243c15..3db46aae6d96 100644
--- a/llvm/lib/TableGen/SetTheory.cpp
+++ b/llvm/lib/TableGen/SetTheory.cpp
@@ -11,9 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/TableGen/SetTheory.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Format.h"
@@ -21,7 +21,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/SetTheory.h"
 #include <algorithm>
 #include <cstdint>
 #include <string>
diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp
index 3709a375ed1b..90646a0c642d 100644
--- a/llvm/lib/TableGen/TGParser.cpp
+++ b/llvm/lib/TableGen/TGParser.cpp
@@ -11,8 +11,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "TGParser.h"
+#include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/None.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
@@ -21,7 +21,6 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/SourceMgr.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
diff --git a/llvm/lib/TableGen/TableGenBackendSkeleton.cpp b/llvm/lib/TableGen/TableGenBackendSkeleton.cpp
index 4ce88e003e65..0ba00c8d8ab1 100644
--- a/llvm/lib/TableGen/TableGenBackendSkeleton.cpp
+++ b/llvm/lib/TableGen/TableGenBackendSkeleton.cpp
@@ -10,22 +10,17 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Record.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/TableGen/TableGenBackend.h"
-#include <algorithm>
-#include <set>
-#include <string>
-#include <vector>
 
 #define DEBUG_TYPE "skeleton-emitter"
 
+namespace llvm {
+class RecordKeeper;
+class raw_ostream;
+} // namespace llvm
+
 using namespace llvm;
 
 namespace {
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index b87468d5c8de..9a04b28a8b8f 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -972,6 +972,10 @@ def ProcessorFeatures {
   list<SubtargetFeature> X1   = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
                                  FeatureNEON, FeatureRCPC, FeaturePerfMon,
                                  FeatureSPE, FeatureFullFP16, FeatureDotProd];
+  list<SubtargetFeature> X1C  = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+                                 FeatureNEON, FeatureRCPC, FeaturePerfMon,
+                                 FeatureSPE, FeatureFullFP16, FeatureDotProd,
+                                 FeaturePAuth];
   list<SubtargetFeature> X2   = [HasV9_0aOps, FeatureNEON, FeaturePerfMon,
                                  FeatureMatMulInt8, FeatureBF16, FeatureAM,
                                  FeatureMTE, FeatureETE, FeatureSVE2BitPerm,
@@ -1086,6 +1090,8 @@ def : ProcessorModel<"cortex-r82", CortexA55Model, ProcessorFeatures.R82,
                      [TuneR82]>;
 def : ProcessorModel<"cortex-x1", CortexA57Model, ProcessorFeatures.X1,
                      [TuneX1]>;
+def : ProcessorModel<"cortex-x1c", CortexA57Model, ProcessorFeatures.X1C,
+                     [TuneX1]>;
 def : ProcessorModel<"cortex-x2", CortexA57Model, ProcessorFeatures.X2,
                      [TuneX2]>;
 def : ProcessorModel<"neoverse-e1", CortexA53Model,
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 85a9c04a3fef..b54a0eaba7d1 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -95,6 +95,8 @@ public:
 
   void LowerJumpTableDest(MCStreamer &OutStreamer, const MachineInstr &MI);
 
+  void LowerMOPS(MCStreamer &OutStreamer, const MachineInstr &MI);
+
   void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
                      const MachineInstr &MI);
   void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
@@ -936,6 +938,43 @@ void AArch64AsmPrinter::LowerJumpTableDest(llvm::MCStreamer &OutStreamer,
                                   .addImm(Size == 4 ? 0 : 2));
 }
 
+void AArch64AsmPrinter::LowerMOPS(llvm::MCStreamer &OutStreamer,
+                                  const llvm::MachineInstr &MI) {
+  unsigned Opcode = MI.getOpcode();
+  assert(STI->hasMOPS());
+  assert(STI->hasMTE() || Opcode != AArch64::MOPSMemorySetTaggingPseudo);
+
+  const auto Ops = [Opcode]() -> std::array<unsigned, 3> {
+    if (Opcode == AArch64::MOPSMemoryCopyPseudo)
+      return {AArch64::CPYFP, AArch64::CPYFM, AArch64::CPYFE};
+    if (Opcode == AArch64::MOPSMemoryMovePseudo)
+      return {AArch64::CPYP, AArch64::CPYM, AArch64::CPYE};
+    if (Opcode == AArch64::MOPSMemorySetPseudo)
+      return {AArch64::SETP, AArch64::SETM, AArch64::SETE};
+    if (Opcode == AArch64::MOPSMemorySetTaggingPseudo)
+      return {AArch64::SETGP, AArch64::SETGM, AArch64::MOPSSETGE};
+    llvm_unreachable("Unhandled memory operation pseudo");
+  }();
+  const bool IsSet = Opcode == AArch64::MOPSMemorySetPseudo ||
+                     Opcode == AArch64::MOPSMemorySetTaggingPseudo;
+
+  for (auto Op : Ops) {
+    int i = 0;
+    auto MCIB = MCInstBuilder(Op);
+    // Destination registers
+    MCIB.addReg(MI.getOperand(i++).getReg());
+    MCIB.addReg(MI.getOperand(i++).getReg());
+    if (!IsSet)
+      MCIB.addReg(MI.getOperand(i++).getReg());
+    // Input registers
+    MCIB.addReg(MI.getOperand(i++).getReg());
+    MCIB.addReg(MI.getOperand(i++).getReg());
+    MCIB.addReg(MI.getOperand(i++).getReg());
+
+    EmitToStreamer(OutStreamer, MCIB);
+  }
+}
+
 void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
                                       const MachineInstr &MI) {
   unsigned NumNOPBytes = StackMapOpers(&MI).getNumPatchBytes();
@@ -1363,6 +1402,13 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
     emitFMov0(*MI);
     return;
 
+  case AArch64::MOPSMemoryCopyPseudo:
+  case AArch64::MOPSMemoryMovePseudo:
+  case AArch64::MOPSMemorySetPseudo:
+  case AArch64::MOPSMemorySetTaggingPseudo:
+    LowerMOPS(*OutStreamer, *MI);
+    return;
+
   case TargetOpcode::STACKMAP:
     return LowerSTACKMAP(*OutStreamer, SM, *MI);
 
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 109b739528bf..b0f739cc26e6 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -709,20 +709,24 @@ bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
 
 bool AArch64ExpandPseudo::expandCALL_RVMARKER(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
-  // Expand CALL_RVMARKER pseudo to a branch, followed by the special `mov x29,
-  // x29` marker. Mark the sequence as bundle, to avoid passes moving other code
-  // in between.
+  // Expand CALL_RVMARKER pseudo to:
+  // - a branch to the call target, followed by
+  // - the special `mov x29, x29` marker, and
+  // - another branch, to the runtime function
+  // Mark the sequence as bundle, to avoid passes moving other code in between.
   MachineInstr &MI = *MBBI;
 
   MachineInstr *OriginalCall;
-  MachineOperand &CallTarget = MI.getOperand(0);
+  MachineOperand &RVTarget = MI.getOperand(0);
+  MachineOperand &CallTarget = MI.getOperand(1);
   assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
          "invalid operand for regular call");
+  assert(RVTarget.isGlobal() && "invalid operand for attached call");
   unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
   OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
   OriginalCall->addOperand(CallTarget);
 
-  unsigned RegMaskStartIdx = 1;
+  unsigned RegMaskStartIdx = 2;
   // Skip register arguments. Those are added during ISel, but are not
   // needed for the concrete branch.
   while (!MI.getOperand(RegMaskStartIdx).isRegMask()) {
@@ -736,17 +740,22 @@ bool AArch64ExpandPseudo::expandCALL_RVMARKER(
        llvm::drop_begin(MI.operands(), RegMaskStartIdx))
     OriginalCall->addOperand(MO);
 
-  auto *Marker = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
+  BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
                      .addReg(AArch64::FP, RegState::Define)
                      .addReg(AArch64::XZR)
                      .addReg(AArch64::FP)
-                     .addImm(0)
+                     .addImm(0);
+
+  auto *RVCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::BL))
+                     .add(RVTarget)
                      .getInstr();
+
   if (MI.shouldUpdateCallSiteInfo())
-    MBB.getParent()->moveCallSiteInfo(&MI, Marker);
+    MBB.getParent()->moveCallSiteInfo(&MI, OriginalCall);
+
   MI.eraseFromParent();
   finalizeBundle(MBB, OriginalCall->getIterator(),
-                 std::next(Marker->getIterator()));
+                 std::next(RVCall->getIterator()));
   return true;
 }
 
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a26bbc77f248..c539c8617d99 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -29,6 +29,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/Analysis/ObjCARCUtil.h"
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/CodeGen/Analysis.h"
@@ -938,19 +939,20 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
 
   // In case of strict alignment, avoid an excessive number of byte wide stores.
   MaxStoresPerMemsetOptSize = 8;
-  MaxStoresPerMemset = Subtarget->requiresStrictAlign()
-                       ? MaxStoresPerMemsetOptSize : 32;
+  MaxStoresPerMemset =
+      Subtarget->requiresStrictAlign() ? MaxStoresPerMemsetOptSize : 32;
 
   MaxGluedStoresPerMemcpy = 4;
   MaxStoresPerMemcpyOptSize = 4;
-  MaxStoresPerMemcpy = Subtarget->requiresStrictAlign()
-                       ? MaxStoresPerMemcpyOptSize : 16;
+  MaxStoresPerMemcpy =
+      Subtarget->requiresStrictAlign() ? MaxStoresPerMemcpyOptSize : 16;
 
-  MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4;
+  MaxStoresPerMemmoveOptSize = 4;
+  MaxStoresPerMemmove = 4;
 
   MaxLoadsPerMemcmpOptSize = 4;
-  MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign()
-                      ? MaxLoadsPerMemcmpOptSize : 8;
+  MaxLoadsPerMemcmp =
+      Subtarget->requiresStrictAlign() ? MaxLoadsPerMemcmpOptSize : 8;
 
   setStackPointerRegisterToSaveRestore(AArch64::SP);
 
@@ -1426,6 +1428,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv16i1, MVT::nxv16i8);
   }
 
+  if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
+    // Only required for llvm.aarch64.mops.memset.tag
+    setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
+  }
+
   PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
 }
 
@@ -2201,7 +2208,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
     MAKE_CASE(AArch64ISD::INSR)
     MAKE_CASE(AArch64ISD::PTEST)
     MAKE_CASE(AArch64ISD::PTRUE)
-    MAKE_CASE(AArch64ISD::PFALSE)
     MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO)
     MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO)
     MAKE_CASE(AArch64ISD::LDNF1_MERGE_ZERO)
@@ -2268,6 +2274,10 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
     MAKE_CASE(AArch64ISD::UADDLP)
     MAKE_CASE(AArch64ISD::CALL_RVMARKER)
     MAKE_CASE(AArch64ISD::ASSERT_ZEXT_BOOL)
+    MAKE_CASE(AArch64ISD::MOPS_MEMSET)
+    MAKE_CASE(AArch64ISD::MOPS_MEMSET_TAGGING)
+    MAKE_CASE(AArch64ISD::MOPS_MEMCOPY)
+    MAKE_CASE(AArch64ISD::MOPS_MEMMOVE)
   }
 #undef MAKE_CASE
   return nullptr;
@@ -3746,6 +3756,10 @@ SDValue AArch64TargetLowering::LowerBITCAST(SDValue Op,
   if (OpVT != MVT::f16 && OpVT != MVT::bf16)
     return SDValue();
 
+  // Bitcasts between f16 and bf16 are legal.
+  if (ArgVT == MVT::f16 || ArgVT == MVT::bf16)
+    return Op;
+
   assert(ArgVT == MVT::i16);
   SDLoc DL(Op);
 
@@ -4056,6 +4070,39 @@ static SDValue lowerConvertToSVBool(SDValue Op, SelectionDAG &DAG) {
   return DAG.getNode(ISD::AND, DL, OutVT, Reinterpret, MaskReinterpret);
 }
 
+SDValue AArch64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
+                                                      SelectionDAG &DAG) const {
+  unsigned IntNo = Op.getConstantOperandVal(1);
+  switch (IntNo) {
+  default:
+    return SDValue(); // Don't custom lower most intrinsics.
+  case Intrinsic::aarch64_mops_memset_tag: {
+    auto Node = cast<MemIntrinsicSDNode>(Op.getNode());
+    SDLoc DL(Op);
+    SDValue Chain = Node->getChain();
+    SDValue Dst = Op.getOperand(2);
+    SDValue Val = Op.getOperand(3);
+    Val = DAG.getAnyExtOrTrunc(Val, DL, MVT::i64);
+    SDValue Size = Op.getOperand(4);
+    auto Alignment = Node->getMemOperand()->getAlign();
+    bool IsVol = Node->isVolatile();
+    auto DstPtrInfo = Node->getPointerInfo();
+
+    const auto &SDI =
+        static_cast<const AArch64SelectionDAGInfo &>(DAG.getSelectionDAGInfo());
+    SDValue MS =
+        SDI.EmitMOPS(AArch64ISD::MOPS_MEMSET_TAGGING, DAG, DL, Chain, Dst, Val,
+                     Size, Alignment, IsVol, DstPtrInfo, MachinePointerInfo{});
+
+    // MOPS_MEMSET_TAGGING has 3 results (DstWb, SizeWb, Chain) whereas the
+    // intrinsic has 2. So hide SizeWb using MERGE_VALUES. Otherwise
+    // LowerOperationWrapper will complain that the number of results has
+    // changed.
+    return DAG.getMergeValues({MS.getValue(0), MS.getValue(2)}, DL);
+  }
+  }
+}
+
 SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                                                      SelectionDAG &DAG) const {
   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -5123,6 +5170,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
   case ISD::MULHU:
     return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED,
                                /*OverrideNEON=*/true);
+  case ISD::INTRINSIC_W_CHAIN:
+    return LowerINTRINSIC_W_CHAIN(Op, DAG);
   case ISD::INTRINSIC_WO_CHAIN:
     return LowerINTRINSIC_WO_CHAIN(Op, DAG);
   case ISD::ATOMIC_STORE:
@@ -6475,12 +6524,18 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
 
   unsigned CallOpc = AArch64ISD::CALL;
   // Calls with operand bundle "clang.arc.attachedcall" are special. They should
-  // be expanded to the call, directly followed by a special marker sequence.
-  // Use the CALL_RVMARKER to do that.
+  // be expanded to the call, directly followed by a special marker sequence and
+  // a call to an ObjC library function.  Use CALL_RVMARKER to do that.
   if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
     assert(!IsTailCall &&
            "tail calls cannot be marked with clang.arc.attachedcall");
     CallOpc = AArch64ISD::CALL_RVMARKER;
+
+    // Add a target global address for the retainRV/claimRV runtime function
+    // just before the call target.
+    Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB);
+    auto GA = DAG.getTargetGlobalAddress(ARCFn, DL, PtrVT);
+    Ops.insert(Ops.begin() + 1, GA);
   }
 
   // Returns a chain and a flag for retval copy to use.
@@ -9985,8 +10040,9 @@ SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
     // The only legal i1 vectors are SVE vectors, so we can use SVE-specific
     // lowering code.
     if (auto *ConstVal = dyn_cast<ConstantSDNode>(SplatVal)) {
+      // We can hande the zero case during isel.
       if (ConstVal->isZero())
-        return DAG.getNode(AArch64ISD::PFALSE, dl, VT);
+        return Op;
       if (ConstVal->isOne())
         return getPTrue(DAG, dl, VT, AArch64SVEPredPattern::all);
     }
@@ -11869,6 +11925,19 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MONonTemporal;
     return true;
   }
+  case Intrinsic::aarch64_mops_memset_tag: {
+    Value *Dst = I.getArgOperand(0);
+    Value *Val = I.getArgOperand(1);
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.memVT = MVT::getVT(Val->getType());
+    Info.ptrVal = Dst;
+    Info.offset = 0;
+    Info.align = I.getParamAlign(0).valueOrOne();
+    Info.flags = MachineMemOperand::MOStore;
+    // The size of the memory being operated on is unknown at this point
+    Info.size = MemoryLocation::UnknownSize;
+    return true;
+  }
   default:
     break;
   }
@@ -15092,7 +15161,7 @@ static bool isAllInactivePredicate(SDValue N) {
   while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST)
     N = N.getOperand(0);
 
-  return N.getOpcode() == AArch64ISD::PFALSE;
+  return ISD::isConstantSplatVectorAllZeros(N.getNode());
 }
 
 static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) {
@@ -15393,6 +15462,52 @@ static SDValue performIntrinsicCombine(SDNode *N,
   return SDValue();
 }
 
+static bool isCheapToExtend(const SDValue &N) {
+  unsigned OC = N->getOpcode();
+  return OC == ISD::LOAD || OC == ISD::MLOAD ||
+         ISD::isConstantSplatVectorAllZeros(N.getNode());
+}
+
+static SDValue
+performSignExtendSetCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+                              SelectionDAG &DAG) {
+  // If we have (sext (setcc A B)) and A and B are cheap to extend,
+  // we can move the sext into the arguments and have the same result. For
+  // example, if A and B are both loads, we can make those extending loads and
+  // avoid an extra instruction. This pattern appears often in VLS code
+  // generation where the inputs to the setcc have a different size to the
+  // instruction that wants to use the result of the setcc.
+  assert(N->getOpcode() == ISD::SIGN_EXTEND &&
+         N->getOperand(0)->getOpcode() == ISD::SETCC);
+  const SDValue SetCC = N->getOperand(0);
+
+  const SDValue CCOp0 = SetCC.getOperand(0);
+  const SDValue CCOp1 = SetCC.getOperand(1);
+  if (!CCOp0->getValueType(0).isInteger() ||
+      !CCOp1->getValueType(0).isInteger())
+    return SDValue();
+
+  ISD::CondCode Code =
+      cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get();
+
+  ISD::NodeType ExtType =
+      isSignedIntSetCC(Code) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+
+  if (isCheapToExtend(SetCC.getOperand(0)) &&
+      isCheapToExtend(SetCC.getOperand(1))) {
+    const SDValue Ext1 =
+        DAG.getNode(ExtType, SDLoc(N), N->getValueType(0), CCOp0);
+    const SDValue Ext2 =
+        DAG.getNode(ExtType, SDLoc(N), N->getValueType(0), CCOp1);
+
+    return DAG.getSetCC(
+        SDLoc(SetCC), N->getValueType(0), Ext1, Ext2,
+        cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get());
+  }
+
+  return SDValue();
+}
+
 static SDValue performExtendCombine(SDNode *N,
                                     TargetLowering::DAGCombinerInfo &DCI,
                                     SelectionDAG &DAG) {
@@ -15411,6 +15526,12 @@ static SDValue performExtendCombine(SDNode *N,
 
     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD);
   }
+
+  if (N->getValueType(0).isFixedLengthVector() &&
+      N->getOpcode() == ISD::SIGN_EXTEND &&
+      N->getOperand(0)->getOpcode() == ISD::SETCC)
+    return performSignExtendSetCCCombine(N, DCI, DAG);
+
   return SDValue();
 }
 
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index ca6c70297c0b..2138c0ffe70a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -323,7 +323,6 @@ enum NodeType : unsigned {
   INSR,
   PTEST,
   PTRUE,
-  PFALSE,
 
   BITREVERSE_MERGE_PASSTHRU,
   BSWAP_MERGE_PASSTHRU,
@@ -453,6 +452,12 @@ enum NodeType : unsigned {
   LDP,
   STP,
   STNP,
+
+  // Memory Operations
+  MOPS_MEMSET,
+  MOPS_MEMSET_TAGGING,
+  MOPS_MEMCOPY,
+  MOPS_MEMMOVE,
 };
 
 } // end namespace AArch64ISD
@@ -890,6 +895,7 @@ private:
 
   SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const;
 
+  SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
 
   bool isEligibleForTailCallOptimization(
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 93c17133c845..a9191924129c 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -93,9 +93,18 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
   //        before the assembly printer.
   unsigned NumBytes = 0;
   const MCInstrDesc &Desc = MI.getDesc();
+
+  // Size should be preferably set in
+  // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
+  // Specific cases handle instructions of variable sizes
   switch (Desc.getOpcode()) {
   default:
-    // Anything not explicitly designated otherwise is a normal 4-byte insn.
+    if (Desc.getSize())
+      return Desc.getSize();
+
+    // Anything not explicitly designated otherwise (i.e. pseudo-instructions
+    // with fixed constant size but not specified in .td file) is a normal
+    // 4-byte insn.
     NumBytes = 4;
     break;
   case TargetOpcode::STACKMAP:
@@ -115,29 +124,9 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
     if (NumBytes == 0)
       NumBytes = 4;
     break;
-  case AArch64::TLSDESC_CALLSEQ:
-    // This gets lowered to an instruction sequence which takes 16 bytes
-    NumBytes = 16;
-    break;
-  case AArch64::SpeculationBarrierISBDSBEndBB:
-    // This gets lowered to 2 4-byte instructions.
-    NumBytes = 8;
-    break;
-  case AArch64::SpeculationBarrierSBEndBB:
-    // This gets lowered to 1 4-byte instructions.
-    NumBytes = 4;
-    break;
-  case AArch64::JumpTableDest32:
-  case AArch64::JumpTableDest16:
-  case AArch64::JumpTableDest8:
-    NumBytes = 12;
-    break;
   case AArch64::SPACE:
     NumBytes = MI.getOperand(1).getImm();
     break;
-  case AArch64::StoreSwiftAsyncContext:
-    NumBytes = 20;
-    break;
   case TargetOpcode::BUNDLE:
     NumBytes = getInstBundleLength(MI);
     break;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index c8a697c8b82f..83bf89ff97c5 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -780,6 +780,7 @@ def : Pat<(AArch64LOADgot texternalsym:$addr),
 def : Pat<(AArch64LOADgot tconstpool:$addr),
           (LOADgot tconstpool:$addr)>;
 
+// In general these get lowered into a sequence of three 4-byte instructions.
 // 32-bit jump table destination is actually only 2 instructions since we can
 // use the table itself as a PC-relative base. But optimization occurs after
 // branch relaxation so be pessimistic.
@@ -815,8 +816,12 @@ let hasSideEffects = 1, isCodeGenOnly = 1 in {
 // SpeculationBarrierEndBB must only be used after an unconditional control
 // flow, i.e. after a terminator for which isBarrier is True.
 let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in {
+  // This gets lowered to a pair of 4-byte instructions.
+  let Size = 8 in
   def SpeculationBarrierISBDSBEndBB
       : Pseudo<(outs), (ins), []>, Sched<[]>;
+  // This gets lowered to a 4-byte instruction.
+  let Size = 4 in
   def SpeculationBarrierSBEndBB
       : Pseudo<(outs), (ins), []>, Sched<[]>;
 }
@@ -2324,8 +2329,8 @@ def : Pat<(AArch64call GPR64noip:$Rn),
           (BLRNoIP GPR64noip:$Rn)>,
       Requires<[SLSBLRMitigation]>;
 
-def : Pat<(AArch64call_rvmarker GPR64:$Rn),
-          (BLR_RVMARKER GPR64:$Rn)>,
+def : Pat<(AArch64call_rvmarker (i64 tglobaladdr:$rvfunc), GPR64:$Rn),
+          (BLR_RVMARKER tglobaladdr:$rvfunc, GPR64:$Rn)>,
       Requires<[NoSLSBLRMitigation]>;
 
 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
@@ -2356,7 +2361,8 @@ def EMITBKEY : Pseudo<(outs), (ins), []>, Sched<[]> {}
 
 // FIXME: maybe the scratch register used shouldn't be fixed to X1?
 // FIXME: can "hasSideEffects be dropped?
-let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1,
+// This gets lowered to an instruction sequence which takes 16 bytes
+let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1, Size = 16,
     isCodeGenOnly = 1 in
 def TLSDESC_CALLSEQ
     : Pseudo<(outs), (ins i64imm:$sym),
@@ -7546,6 +7552,9 @@ def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))),
 def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
           (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
 
+def : Pat<(f16 (bitconvert (bf16 FPR16:$src))), (f16 FPR16:$src)>;
+def : Pat<(bf16 (bitconvert (f16 FPR16:$src))), (bf16 FPR16:$src)>;
+
 let Predicates = [IsLE] in {
 def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
 def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
@@ -8330,26 +8339,67 @@ let Predicates = [HasLS64] in {
 }
 
 let Predicates = [HasMOPS] in {
-  defm CPYFP : MOPSMemoryCopyInsns<0b00, "cpyfp">;
-  defm CPYFM : MOPSMemoryCopyInsns<0b01, "cpyfm">;
-  defm CPYFE : MOPSMemoryCopyInsns<0b10, "cpyfe">;
+  let Defs = [NZCV] in {
+    defm CPYFP : MOPSMemoryCopyInsns<0b00, "cpyfp">;
+
+    defm CPYP : MOPSMemoryMoveInsns<0b00, "cpyp">;
+
+    defm SETP : MOPSMemorySetInsns<0b00, "setp">;
+  }
+  let Uses = [NZCV] in {
+    defm CPYFM : MOPSMemoryCopyInsns<0b01, "cpyfm">;
+    defm CPYFE : MOPSMemoryCopyInsns<0b10, "cpyfe">;
 
-  defm CPYP : MOPSMemoryMoveInsns<0b00, "cpyp">;
-  defm CPYM : MOPSMemoryMoveInsns<0b01, "cpym">;
-  defm CPYE : MOPSMemoryMoveInsns<0b10, "cpye">;
+    defm CPYM : MOPSMemoryMoveInsns<0b01, "cpym">;
+    defm CPYE : MOPSMemoryMoveInsns<0b10, "cpye">;
 
-  defm SETP : MOPSMemorySetInsns<0b00, "setp">;
-  defm SETM : MOPSMemorySetInsns<0b01, "setm">;
-  defm SETE : MOPSMemorySetInsns<0b10, "sete">;
+    defm SETM : MOPSMemorySetInsns<0b01, "setm">;
+    defm SETE : MOPSMemorySetInsns<0b10, "sete">;
+  }
 }
 let Predicates = [HasMOPS, HasMTE] in {
-  defm SETGP     : MOPSMemorySetTaggingInsns<0b00, "setgp">;
-  defm SETGM     : MOPSMemorySetTaggingInsns<0b01, "setgm">;
-  // Can't use SETGE because it's a reserved name in TargetSelectionDAG.td
-  defm MOPSSETGE : MOPSMemorySetTaggingInsns<0b10, "setge">;
+  let Defs = [NZCV] in {
+    defm SETGP     : MOPSMemorySetTaggingInsns<0b00, "setgp">;
+  }
+  let Uses = [NZCV] in {
+    defm SETGM     : MOPSMemorySetTaggingInsns<0b01, "setgm">;
+    // Can't use SETGE because it's a reserved name in TargetSelectionDAG.td
+    defm MOPSSETGE : MOPSMemorySetTaggingInsns<0b10, "setge">;
+  }
+}
+
+// MOPS Node operands: 0: Dst, 1: Src or Value, 2: Size, 3: Chain
+// MOPS Node results: 0: Dst writeback, 1: Size writeback, 2: Chain
+def SDT_AArch64mops : SDTypeProfile<2, 3, [ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2> ]>;
+def AArch64mops_memset : SDNode<"AArch64ISD::MOPS_MEMSET", SDT_AArch64mops>;
+def AArch64mops_memset_tagging : SDNode<"AArch64ISD::MOPS_MEMSET_TAGGING", SDT_AArch64mops>;
+def AArch64mops_memcopy : SDNode<"AArch64ISD::MOPS_MEMCOPY", SDT_AArch64mops>;
+def AArch64mops_memmove : SDNode<"AArch64ISD::MOPS_MEMMOVE", SDT_AArch64mops>;
+
+// MOPS operations always contain three 4-byte instructions
+let Predicates = [HasMOPS], Defs = [NZCV], Size = 12, mayStore = 1 in {
+  let mayLoad = 1 in {
+    def MOPSMemoryCopyPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb),
+                                      (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn),
+                                      [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>;
+    def MOPSMemoryMovePseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb),
+                                      (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn),
+                                      [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>;
+  }
+  let mayLoad = 0 in {
+    def MOPSMemorySetPseudo  : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb),
+                                      (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm),
+                                      [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>;
+  }
+}
+let Predicates = [HasMOPS, HasMTE], Defs = [NZCV], Size = 12, mayLoad = 0, mayStore = 1 in {
+  def MOPSMemorySetTaggingPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb),
+                                          (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm),
+                                          [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>;
 }
 
-let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1 in
+// This gets lowered into an instruction sequence of 20 bytes
+let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1, Size = 20 in
 def StoreSwiftAsyncContext
       : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset),
                []>, Sched<[]>;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 73a680465f6f..1d162610de9c 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -292,7 +292,13 @@ def SDT_AArch64Arith_Unpred : SDTypeProfile<1, 2, [
   SDTCisSameAs<0,1>, SDTCisSameAs<1,2>
 ]>;
 
-def AArch64bic : SDNode<"AArch64ISD::BIC",  SDT_AArch64Arith_Unpred>;
+def AArch64bic_node : SDNode<"AArch64ISD::BIC",  SDT_AArch64Arith_Unpred>;
+
+def AArch64bic : PatFrags<(ops node:$op1, node:$op2),
+                          [(and node:$op1, (xor node:$op2, (AArch64dup (i32 -1)))),
+                           (and node:$op1, (xor node:$op2, (AArch64dup (i64 -1)))),
+                           (and node:$op1, (xor node:$op2, (SVEAllActive))),
+                           (AArch64bic_node node:$op1, node:$op2)]>;
 
 let Predicates = [HasSVE] in {
   defm RDFFR_PPz  : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>;
@@ -734,14 +740,14 @@ let Predicates = [HasSVEorStreamingSVE] in {
   defm PFIRST  : sve_int_pfirst<0b00000, "pfirst", int_aarch64_sve_pfirst>;
   defm PNEXT   : sve_int_pnext<0b00110, "pnext", int_aarch64_sve_pnext>;
 
-  defm AND_PPzPP   : sve_int_pred_log_and<0b0000, "and", int_aarch64_sve_and_z>;
-  defm BIC_PPzPP   : sve_int_pred_log<0b0001, "bic", int_aarch64_sve_bic_z>;
+  defm AND_PPzPP   : sve_int_pred_log_v2<0b0000, "and", int_aarch64_sve_and_z, and>;
+  defm BIC_PPzPP   : sve_int_pred_log_v2<0b0001, "bic", int_aarch64_sve_bic_z, AArch64bic>;
   defm EOR_PPzPP   : sve_int_pred_log<0b0010, "eor", int_aarch64_sve_eor_z, xor>;
-  defm SEL_PPPP    : sve_int_pred_log<0b0011, "sel", vselect>;
+  defm SEL_PPPP    : sve_int_pred_log_v2<0b0011, "sel", vselect, or>;
   defm ANDS_PPzPP  : sve_int_pred_log<0b0100, "ands", null_frag>;
   defm BICS_PPzPP  : sve_int_pred_log<0b0101, "bics", null_frag>;
   defm EORS_PPzPP  : sve_int_pred_log<0b0110, "eors", null_frag>;
-  defm ORR_PPzPP   : sve_int_pred_log<0b1000, "orr", int_aarch64_sve_orr_z, or>;
+  defm ORR_PPzPP   : sve_int_pred_log<0b1000, "orr", int_aarch64_sve_orr_z>;
   defm ORN_PPzPP   : sve_int_pred_log<0b1001, "orn", int_aarch64_sve_orn_z>;
   defm NOR_PPzPP   : sve_int_pred_log<0b1010, "nor", int_aarch64_sve_nor_z>;
   defm NAND_PPzPP  : sve_int_pred_log<0b1011, "nand", int_aarch64_sve_nand_z>;
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index d2d84b2a3f6d..893269c1a7ef 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -15,15 +15,95 @@ using namespace llvm;
 
 #define DEBUG_TYPE "aarch64-selectiondag-info"
 
+SDValue AArch64SelectionDAGInfo::EmitMOPS(AArch64ISD::NodeType SDOpcode,
+                                          SelectionDAG &DAG, const SDLoc &DL,
+                                          SDValue Chain, SDValue Dst,
+                                          SDValue SrcOrValue, SDValue Size,
+                                          Align Alignment, bool isVolatile,
+                                          MachinePointerInfo DstPtrInfo,
+                                          MachinePointerInfo SrcPtrInfo) const {
+
+  // Get the constant size of the copy/set.
+  uint64_t ConstSize = 0;
+  if (auto *C = dyn_cast<ConstantSDNode>(Size))
+    ConstSize = C->getZExtValue();
+
+  const bool IsSet = SDOpcode == AArch64ISD::MOPS_MEMSET ||
+                     SDOpcode == AArch64ISD::MOPS_MEMSET_TAGGING;
+
+  const auto MachineOpcode = [&]() {
+    switch (SDOpcode) {
+    case AArch64ISD::MOPS_MEMSET:
+      return AArch64::MOPSMemorySetPseudo;
+    case AArch64ISD::MOPS_MEMSET_TAGGING:
+      return AArch64::MOPSMemorySetTaggingPseudo;
+    case AArch64ISD::MOPS_MEMCOPY:
+      return AArch64::MOPSMemoryCopyPseudo;
+    case AArch64ISD::MOPS_MEMMOVE:
+      return AArch64::MOPSMemoryMovePseudo;
+    default:
+      llvm_unreachable("Unhandled MOPS ISD Opcode");
+    }
+  }();
+
+  MachineMemOperand::Flags Flags = MachineMemOperand::MOStore;
+  if (isVolatile)
+    Flags |= MachineMemOperand::MOVolatile;
+  if (!IsSet)
+    Flags |= MachineMemOperand::MOLoad;
+
+  MachineFunction &MF = DAG.getMachineFunction();
+
+  auto *DstOp =
+      MF.getMachineMemOperand(DstPtrInfo, Flags, ConstSize, Alignment);
+  auto *SrcOp =
+      MF.getMachineMemOperand(SrcPtrInfo, Flags, ConstSize, Alignment);
+
+  if (IsSet) {
+    // Extend value to i64 if required
+    if (SrcOrValue.getValueType() != MVT::i64)
+      SrcOrValue = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, SrcOrValue);
+    SDValue Ops[] = {Dst, Size, SrcOrValue, Chain};
+    const EVT ResultTys[] = {MVT::i64, MVT::i64, MVT::Other};
+    MachineSDNode *Node = DAG.getMachineNode(MachineOpcode, DL, ResultTys, Ops);
+    DAG.setNodeMemRefs(Node, {DstOp});
+    return SDValue(Node, 2);
+  } else {
+    SDValue Ops[] = {Dst, SrcOrValue, Size, Chain};
+    const EVT ResultTys[] = {MVT::i64, MVT::i64, MVT::i64, MVT::Other};
+    MachineSDNode *Node = DAG.getMachineNode(MachineOpcode, DL, ResultTys, Ops);
+    DAG.setNodeMemRefs(Node, {DstOp, SrcOp});
+    return SDValue(Node, 3);
+  }
+}
+
+SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemcpy(
+    SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src,
+    SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
+    MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
+  const AArch64Subtarget &STI =
+      DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
+  if (STI.hasMOPS())
+    return EmitMOPS(AArch64ISD::MOPS_MEMCOPY, DAG, DL, Chain, Dst, Src, Size,
+                    Alignment, isVolatile, DstPtrInfo, SrcPtrInfo);
+  return SDValue();
+}
+
 SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
     SDValue Size, Align Alignment, bool isVolatile,
     MachinePointerInfo DstPtrInfo) const {
+  const AArch64Subtarget &STI =
+      DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
+
+  if (STI.hasMOPS()) {
+    return EmitMOPS(AArch64ISD::MOPS_MEMSET, DAG, dl, Chain, Dst, Src, Size,
+                    Alignment, isVolatile, DstPtrInfo, MachinePointerInfo{});
+  }
+
   // Check to see if there is a specialized entry-point for memory zeroing.
   ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
   ConstantSDNode *SizeValue = dyn_cast<ConstantSDNode>(Size);
-  const AArch64Subtarget &STI =
-      DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
   const char *bzeroName =
       (V && V->isZero())
           ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO)
@@ -55,6 +135,19 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
   return SDValue();
 }
 
+SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemmove(
+    SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
+    SDValue Size, Align Alignment, bool isVolatile,
+    MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
+  const AArch64Subtarget &STI =
+      DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
+  if (STI.hasMOPS()) {
+    return EmitMOPS(AArch64ISD::MOPS_MEMMOVE, DAG, dl, Chain, Dst, Src, Size,
+                    Alignment, isVolatile, DstPtrInfo, SrcPtrInfo);
+  }
+  return SDValue();
+}
+
 static const int kSetTagLoopThreshold = 176;
 
 static SDValue EmitUnrolledSetTag(SelectionDAG &DAG, const SDLoc &dl,
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
index 7d53bd456975..47fe3bf7dcf5 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
@@ -19,11 +19,30 @@ namespace llvm {
 
 class AArch64SelectionDAGInfo : public SelectionDAGTargetInfo {
 public:
+  SDValue EmitMOPS(AArch64ISD::NodeType SDOpcode, SelectionDAG &DAG,
+                   const SDLoc &DL, SDValue Chain, SDValue Dst,
+                   SDValue SrcOrValue, SDValue Size, Align Alignment,
+                   bool isVolatile, MachinePointerInfo DstPtrInfo,
+                   MachinePointerInfo SrcPtrInfo) const;
+
+  SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
+                                  SDValue Chain, SDValue Dst, SDValue Src,
+                                  SDValue Size, Align Alignment,
+                                  bool isVolatile, bool AlwaysInline,
+                                  MachinePointerInfo DstPtrInfo,
+                                  MachinePointerInfo SrcPtrInfo) const override;
   SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl,
                                   SDValue Chain, SDValue Dst, SDValue Src,
                                   SDValue Size, Align Alignment,
                                   bool isVolatile,
                                   MachinePointerInfo DstPtrInfo) const override;
+  SDValue
+  EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain,
+                           SDValue Dst, SDValue Src, SDValue Size,
+                           Align Alignment, bool isVolatile,
+                           MachinePointerInfo DstPtrInfo,
+                           MachinePointerInfo SrcPtrInfo) const override;
+
   SDValue EmitTargetCodeForSetTag(SelectionDAG &DAG, const SDLoc &dl,
                                   SDValue Chain, SDValue Op1, SDValue Op2,
                                   MachinePointerInfo DstPtrInfo,
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index a4f4b8582182..8a7e20237271 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -99,6 +99,7 @@ void AArch64Subtarget::initializeProperties() {
   case CortexA78C:
   case CortexR82:
   case CortexX1:
+  case CortexX1C:
     PrefFunctionLogAlignment = 4;
     break;
   case CortexA510:
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 3e3c0f6aba15..7b2bbad30f85 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -63,6 +63,7 @@ public:
     CortexA710,
     CortexR82,
     CortexX1,
+    CortexX1C,
     CortexX2,
     ExynosM3,
     Falkor,
@@ -217,7 +218,6 @@ protected:
   bool HasETE = false;
   bool HasTRBE = false;
   bool HasBRBE = false;
-  bool HasPAUTH = false;
   bool HasSPE_EEF = false;
 
   // HasZeroCycleRegMove - Has zero-cycle register mov instructions.
@@ -510,7 +510,6 @@ public:
   bool hasRandGen() const { return HasRandGen; }
   bool hasMTE() const { return HasMTE; }
   bool hasTME() const { return HasTME; }
-  bool hasPAUTH() const { return HasPAUTH; }
   // Arm SVE2 extensions
   bool hasSVE2AES() const { return HasSVE2AES; }
   bool hasSVE2SM4() const { return HasSVE2SM4; }
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index a4d666a0a3c2..b2ffdf949d8b 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1886,14 +1886,21 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
                             m_Value())))
         VecPred = CurrentPred;
     }
-    // Check if we have a compare/select chain that can be lowered using CMxx &
-    // BFI pair.
-    if (CmpInst::isIntPredicate(VecPred)) {
-      static const auto ValidMinMaxTys = {MVT::v8i8,  MVT::v16i8, MVT::v4i16,
-                                          MVT::v8i16, MVT::v2i32, MVT::v4i32,
-                                          MVT::v2i64};
+    // Check if we have a compare/select chain that can be lowered using
+    // a (F)CMxx & BFI pair.
+    if (CmpInst::isIntPredicate(VecPred) || VecPred == CmpInst::FCMP_OLE ||
+        VecPred == CmpInst::FCMP_OLT || VecPred == CmpInst::FCMP_OGT ||
+        VecPred == CmpInst::FCMP_OGE || VecPred == CmpInst::FCMP_OEQ ||
+        VecPred == CmpInst::FCMP_UNE) {
+      static const auto ValidMinMaxTys = {
+          MVT::v8i8,  MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
+          MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
+      static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
+
       auto LT = TLI->getTypeLegalizationCost(DL, ValTy);
-      if (any_of(ValidMinMaxTys, [&LT](MVT M) { return M == LT.second; }))
+      if (any_of(ValidMinMaxTys, [&LT](MVT M) { return M == LT.second; }) ||
+          (ST->hasFullFP16() &&
+           any_of(ValidFP16MinMaxTys, [&LT](MVT M) { return M == LT.second; })))
         return LT.first;
     }
 
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 1f546ad50d57..703e356f016d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -192,6 +192,7 @@ private:
   bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
   bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
   bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
+  bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
   bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
 
   unsigned emitConstantPoolEntry(const Constant *CPVal,
@@ -3424,6 +3425,12 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
   case TargetOpcode::G_VECREDUCE_FADD:
   case TargetOpcode::G_VECREDUCE_ADD:
     return selectReduction(I, MRI);
+  case TargetOpcode::G_MEMCPY:
+  case TargetOpcode::G_MEMCPY_INLINE:
+  case TargetOpcode::G_MEMMOVE:
+  case TargetOpcode::G_MEMSET:
+    assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
+    return selectMOPS(I, MRI);
   }
 
   return false;
@@ -3481,6 +3488,64 @@ bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
   return false;
 }
 
+bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
+                                            MachineRegisterInfo &MRI) {
+  unsigned Mopcode;
+  switch (GI.getOpcode()) {
+  case TargetOpcode::G_MEMCPY:
+  case TargetOpcode::G_MEMCPY_INLINE:
+    Mopcode = AArch64::MOPSMemoryCopyPseudo;
+    break;
+  case TargetOpcode::G_MEMMOVE:
+    Mopcode = AArch64::MOPSMemoryMovePseudo;
+    break;
+  case TargetOpcode::G_MEMSET:
+    // For tagged memset see llvm.aarch64.mops.memset.tag
+    Mopcode = AArch64::MOPSMemorySetPseudo;
+    break;
+  }
+
+  auto &DstPtr = GI.getOperand(0);
+  auto &SrcOrVal = GI.getOperand(1);
+  auto &Size = GI.getOperand(2);
+
+  // Create copies of the registers that can be clobbered.
+  const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
+  const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
+  const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
+
+  const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
+  const auto &SrcValRegClass =
+      IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
+
+  // Constrain to specific registers
+  RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
+  RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
+  RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
+
+  MIB.buildCopy(DstPtrCopy, DstPtr);
+  MIB.buildCopy(SrcValCopy, SrcOrVal);
+  MIB.buildCopy(SizeCopy, Size);
+
+  // New instruction uses the copied registers because it must update them.
+  // The defs are not used since they don't exist in G_MEM*. They are still
+  // tied.
+  // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
+  Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
+  Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
+  if (IsSet) {
+    MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
+                   {DstPtrCopy, SizeCopy, SrcValCopy});
+  } else {
+    Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
+    MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
+                   {DstPtrCopy, SrcValCopy, SizeCopy});
+  }
+
+  GI.eraseFromParent();
+  return true;
+}
+
 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
                                             MachineRegisterInfo &MRI) {
   assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
@@ -5375,6 +5440,36 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
     constrainSelectedInstRegOperands(*Store, TII, TRI, RBI);
     break;
   }
+  case Intrinsic::aarch64_mops_memset_tag: {
+    // Transform
+    //    %dst:gpr(p0) = \
+    //      G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
+    //      \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
+    // where %dst is updated, into
+    //    %Rd:GPR64common, %Rn:GPR64) = \
+    //      MOPSMemorySetTaggingPseudo \
+    //      %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
+    // where Rd and Rn are tied.
+    // It is expected that %val has been extended to s64 in legalization.
+    // Note that the order of the size/value operands are swapped.
+
+    Register DstDef = I.getOperand(0).getReg();
+    // I.getOperand(1) is the intrinsic function
+    Register DstUse = I.getOperand(2).getReg();
+    Register ValUse = I.getOperand(3).getReg();
+    Register SizeUse = I.getOperand(4).getReg();
+
+    // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
+    // Therefore an additional virtual register is requried for the updated size
+    // operand. This value is not accessible via the semantics of the intrinsic.
+    Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64));
+
+    auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
+                                 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
+    Memset.cloneMemRefs(I);
+    constrainSelectedInstRegOperands(*Memset, TII, TRI, RBI);
+    break;
+  }
   }
 
   I.eraseFromParent();
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index e8894e7933d6..e9df7e001d38 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -699,8 +699,28 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
 
   getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower();
 
-  getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
-      .libcall();
+  if (ST.hasMOPS()) {
+    // G_BZERO is not supported. Currently it is only emitted by
+    // PreLegalizerCombiner for G_MEMSET with zero constant.
+    getActionDefinitionsBuilder(G_BZERO).unsupported();
+
+    getActionDefinitionsBuilder(G_MEMSET)
+        .legalForCartesianProduct({p0}, {s64}, {s64})
+        .customForCartesianProduct({p0}, {s8}, {s64})
+        .immIdx(0); // Inform verifier imm idx 0 is handled.
+
+    getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
+        .legalForCartesianProduct({p0}, {p0}, {s64})
+        .immIdx(0); // Inform verifier imm idx 0 is handled.
+
+    // G_MEMCPY_INLINE does not have a tailcall immediate
+    getActionDefinitionsBuilder(G_MEMCPY_INLINE)
+        .legalForCartesianProduct({p0}, {p0}, {s64});
+
+  } else {
+    getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
+        .libcall();
+  }
 
   // FIXME: Legal types are only legal with NEON.
   getActionDefinitionsBuilder(G_ABS)
@@ -832,6 +852,11 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
     return legalizeAtomicCmpxchg128(MI, MRI, Helper);
   case TargetOpcode::G_CTTZ:
     return legalizeCTTZ(MI, Helper);
+  case TargetOpcode::G_BZERO:
+  case TargetOpcode::G_MEMCPY:
+  case TargetOpcode::G_MEMMOVE:
+  case TargetOpcode::G_MEMSET:
+    return legalizeMemOps(MI, Helper);
   }
 
   llvm_unreachable("expected switch to return");
@@ -989,6 +1014,15 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
     MI.eraseFromParent();
     return true;
   }
+  case Intrinsic::aarch64_mops_memset_tag: {
+    assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
+    // Zext the value to 64 bit
+    MachineIRBuilder MIB(MI);
+    auto &Value = MI.getOperand(3);
+    Register ZExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
+    Value.setReg(ZExtValueReg);
+    return true;
+  }
   }
 
   return true;
@@ -1359,3 +1393,20 @@ bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
   MI.eraseFromParent();
   return true;
 }
+
+bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
+                                          LegalizerHelper &Helper) const {
+  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+
+  // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
+  if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
+    // Zext the value operand to 64 bit
+    auto &Value = MI.getOperand(1);
+    Register ZExtValueReg =
+        MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
+    Value.setReg(ZExtValueReg);
+    return true;
+  }
+
+  return false;
+}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
index e2c46f4b4c1f..973f96ff4775 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
@@ -56,6 +56,7 @@ private:
   bool legalizeAtomicCmpxchg128(MachineInstr &MI, MachineRegisterInfo &MRI,
                                 LegalizerHelper &Helper) const;
   bool legalizeCTTZ(MachineInstr &MI, LegalizerHelper &Helper) const;
+  bool legalizeMemOps(MachineInstr &MI, LegalizerHelper &Helper) const;
   const AArch64Subtarget *ST;
 };
 } // End llvm namespace.
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 574b22124957..9d4bdbe5d053 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -334,8 +334,6 @@ multiclass sve_int_ptrue<bits<3> opc, string asm, SDPatternOperator op> {
 
 def SDT_AArch64PTrue : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
 def AArch64ptrue : SDNode<"AArch64ISD::PTRUE", SDT_AArch64PTrue>;
-def SDT_AArch64PFalse : SDTypeProfile<1, 0, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>]>;
-def AArch64pfalse : SDNode<"AArch64ISD::PFALSE", SDT_AArch64PFalse>;
 
 let Predicates = [HasSVEorStreamingSVE] in {
   defm PTRUE  : sve_int_ptrue<0b000, "ptrue", AArch64ptrue>;
@@ -614,10 +612,10 @@ class sve_int_pfalse<bits<6> opc, string asm>
 multiclass sve_int_pfalse<bits<6> opc, string asm> {
   def NAME : sve_int_pfalse<opc, asm>;
 
-  def : Pat<(nxv16i1 (AArch64pfalse)), (!cast<Instruction>(NAME))>;
-  def : Pat<(nxv8i1 (AArch64pfalse)), (!cast<Instruction>(NAME))>;
-  def : Pat<(nxv4i1 (AArch64pfalse)), (!cast<Instruction>(NAME))>;
-  def : Pat<(nxv2i1 (AArch64pfalse)), (!cast<Instruction>(NAME))>;
+  def : Pat<(nxv16i1 (splat_vector (i32 0))), (!cast<Instruction>(NAME))>;
+  def : Pat<(nxv8i1 (splat_vector (i32 0))), (!cast<Instruction>(NAME))>;
+  def : Pat<(nxv4i1 (splat_vector (i32 0))), (!cast<Instruction>(NAME))>;
+  def : Pat<(nxv2i1 (splat_vector (i32 0))), (!cast<Instruction>(NAME))>;
 }
 
 class sve_int_ptest<bits<6> opc, string asm>
@@ -773,7 +771,7 @@ multiclass sve_int_count_r_x64<bits<5> opc, string asm,
   def : Pat<(i64 (op GPR64:$Rn, (nxv2i1 PPRAny:$Pg))),
             (!cast<Instruction>(NAME # _D) PPRAny:$Pg, $Rn)>;
 
-  // Combine cntp with combine_op
+  // combine_op(x, cntp(all_active, p)) ==> inst p, x
   def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv16i1 (SVEAllActive)), (nxv16i1 PPRAny:$pred)))),
             (!cast<Instruction>(NAME # _B) PPRAny:$pred, $Rn)>;
   def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv8i1 (SVEAllActive)), (nxv8i1 PPRAny:$pred)))),
@@ -782,6 +780,16 @@ multiclass sve_int_count_r_x64<bits<5> opc, string asm,
             (!cast<Instruction>(NAME # _S) PPRAny:$pred, $Rn)>;
   def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv2i1 (SVEAllActive)), (nxv2i1 PPRAny:$pred)))),
             (!cast<Instruction>(NAME # _D) PPRAny:$pred, $Rn)>;
+
+  // combine_op(x, cntp(p, p)) ==> inst p, x
+  def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv16i1 PPRAny:$pred), (nxv16i1 PPRAny:$pred)))),
+            (!cast<Instruction>(NAME # _B) PPRAny:$pred, $Rn)>;
+  def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv8i1 PPRAny:$pred), (nxv8i1 PPRAny:$pred)))),
+            (!cast<Instruction>(NAME # _H) PPRAny:$pred, $Rn)>;
+  def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv4i1 PPRAny:$pred), (nxv4i1 PPRAny:$pred)))),
+            (!cast<Instruction>(NAME # _S) PPRAny:$pred, $Rn)>;
+  def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv2i1 PPRAny:$pred), (nxv2i1 PPRAny:$pred)))),
+            (!cast<Instruction>(NAME # _D) PPRAny:$pred, $Rn)>;
 }
 
 class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm,
@@ -1633,15 +1641,18 @@ multiclass sve_int_pred_log<bits<4> opc, string asm, SDPatternOperator op,
                                !cast<Instruction>(NAME), PTRUE_D>;
 }
 
-multiclass sve_int_pred_log_and<bits<4> opc, string asm, SDPatternOperator op> :
+// An instance of sve_int_pred_log_and but uses op_nopred's first operand as the
+// general predicate.
+multiclass sve_int_pred_log_v2<bits<4> opc, string asm, SDPatternOperator op,
+                               SDPatternOperator op_nopred> :
   sve_int_pred_log<opc, asm, op> {
-  def : Pat<(nxv16i1 (and nxv16i1:$Op1, nxv16i1:$Op2)),
+  def : Pat<(nxv16i1 (op_nopred nxv16i1:$Op1, nxv16i1:$Op2)),
             (!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>;
-  def : Pat<(nxv8i1 (and nxv8i1:$Op1, nxv8i1:$Op2)),
+  def : Pat<(nxv8i1 (op_nopred nxv8i1:$Op1, nxv8i1:$Op2)),
             (!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>;
-  def : Pat<(nxv4i1 (and nxv4i1:$Op1, nxv4i1:$Op2)),
+  def : Pat<(nxv4i1 (op_nopred nxv4i1:$Op1, nxv4i1:$Op2)),
             (!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>;
-  def : Pat<(nxv2i1 (and nxv2i1:$Op1, nxv2i1:$Op2)),
+  def : Pat<(nxv2i1 (op_nopred nxv2i1:$Op1, nxv2i1:$Op2)),
             (!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>;
 }
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 958e8c9e5bc5..11cc1a01d248 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -11,6 +11,7 @@
 #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
 
 #include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CodeGen.h"
 
 namespace llvm {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
index 7d6845b287bc..bebf032b5535 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
@@ -14,9 +14,12 @@
 
 #include "AMDGPU.h"
 #include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
 #include "llvm/Analysis/MemorySSA.h"
 #include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
 #include "llvm/InitializePasses.h"
 
 #define DEBUG_TYPE "amdgpu-annotate-uniform"
@@ -29,6 +32,7 @@ class AMDGPUAnnotateUniformValues : public FunctionPass,
                        public InstVisitor<AMDGPUAnnotateUniformValues> {
   LegacyDivergenceAnalysis *DA;
   MemorySSA *MSSA;
+  AliasAnalysis *AA;
   DenseMap<Value*, GetElementPtrInst*> noClobberClones;
   bool isEntryFunc;
 
@@ -44,6 +48,7 @@ public:
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<LegacyDivergenceAnalysis>();
     AU.addRequired<MemorySSAWrapperPass>();
+    AU.addRequired<AAResultsWrapperPass>();
     AU.setPreservesAll();
  }
 
@@ -58,6 +63,7 @@ INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
                       "Add AMDGPU uniform metadata", false, false)
 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
                     "Add AMDGPU uniform metadata", false, false)
 
@@ -70,9 +76,79 @@ static void setNoClobberMetadata(Instruction *I) {
   I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {}));
 }
 
-bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) {
-  const MemoryAccess *MA = MSSA->getWalker()->getClobberingMemoryAccess(Load);
-  return !MSSA->isLiveOnEntryDef(MA);
+bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst *Load) {
+  MemorySSAWalker *Walker = MSSA->getWalker();
+  SmallVector<MemoryAccess *> WorkList{Walker->getClobberingMemoryAccess(Load)};
+  SmallSet<MemoryAccess *, 8> Visited;
+  MemoryLocation Loc(MemoryLocation::get(Load));
+
+  const auto isReallyAClobber = [this, Load](MemoryDef *Def) -> bool {
+    Instruction *DefInst = Def->getMemoryInst();
+    LLVM_DEBUG(dbgs() << "  Def: " << *DefInst << '\n');
+
+    if (isa<FenceInst>(DefInst))
+      return false;
+
+    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
+      switch (II->getIntrinsicID()) {
+      case Intrinsic::amdgcn_s_barrier:
+      case Intrinsic::amdgcn_wave_barrier:
+        return false;
+      default:
+        break;
+      }
+    }
+
+    // Ignore atomics not aliasing with the original load, any atomic is a
+    // universal MemoryDef from MSSA's point of view too, just like a fence.
+    const auto checkNoAlias = [this, Load](auto I) -> bool {
+      return I && AA->isNoAlias(I->getPointerOperand(),
+                                Load->getPointerOperand());
+    };
+
+    if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(DefInst)) ||
+        checkNoAlias(dyn_cast<AtomicRMWInst>(DefInst)))
+      return false;
+
+    return true;
+  };
+
+  LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n');
+
+  // Start with a nearest dominating clobbering access, it will be either
+  // live on entry (nothing to do, load is not clobbered), MemoryDef, or
+  // MemoryPhi if several MemoryDefs can define this memory state. In that
+  // case add all Defs to WorkList and continue going up and checking all
+  // the definitions of this memory location until the root. When all the
+  // defs are exhausted and came to the entry state we have no clobber.
+  // Along the scan ignore barriers and fences which are considered clobbers
+  // by the MemorySSA, but not really writing anything into the memory.
+  while (!WorkList.empty()) {
+    MemoryAccess *MA = WorkList.pop_back_val();
+    if (!Visited.insert(MA).second)
+      continue;
+
+    if (MSSA->isLiveOnEntryDef(MA))
+      continue;
+
+    if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) {
+      if (isReallyAClobber(Def)) {
+        LLVM_DEBUG(dbgs() << "      -> load is clobbered\n");
+        return true;
+      }
+
+      WorkList.push_back(
+          Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc));
+      continue;
+    }
+
+    const MemoryPhi *Phi = cast<MemoryPhi>(MA);
+    for (auto &Use : Phi->incoming_values())
+      WorkList.push_back(cast<MemoryAccess>(&Use));
+  }
+
+  LLVM_DEBUG(dbgs() << "      -> no clobber\n");
+  return false;
 }
 
 void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) {
@@ -84,9 +160,6 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
   Value *Ptr = I.getPointerOperand();
   if (!DA->isUniform(Ptr))
     return;
-  auto isGlobalLoad = [&](LoadInst &Load)->bool {
-    return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
-  };
   // We're tracking up to the Function boundaries, and cannot go beyond because
   // of FunctionPass restrictions. We can ensure that is memory not clobbered
   // for memory operations that are live in to entry points only.
@@ -99,7 +172,7 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
   }
 
   bool NotClobbered = false;
-  bool GlobalLoad = isGlobalLoad(I);
+  bool GlobalLoad = I.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
   if (PtrI)
     NotClobbered = GlobalLoad && !isClobberedInFunction(&I);
   else if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) {
@@ -139,6 +212,7 @@ bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) {
 
   DA = &getAnalysis<LegacyDivergenceAnalysis>();
   MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   isEntryFunc = AMDGPU::isEntryFunctionCC(F.getCallingConv());
 
   visit(F);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index bb2e723f4ab0..6e2984f2a04f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -88,6 +88,8 @@ AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
       HSAMetadataStream.reset(new HSAMD::MetadataStreamerV2());
     } else if (isHsaAbiVersion3(getGlobalSTI())) {
       HSAMetadataStream.reset(new HSAMD::MetadataStreamerV3());
+    } else if (isHsaAbiVersion5(getGlobalSTI())) {
+      HSAMetadataStream.reset(new HSAMD::MetadataStreamerV5());
     } else {
       HSAMetadataStream.reset(new HSAMD::MetadataStreamerV4());
     }
@@ -118,7 +120,7 @@ void AMDGPUAsmPrinter::emitStartOfAsmFile(Module &M) {
       TM.getTargetTriple().getOS() != Triple::AMDPAL)
     return;
 
-  if (isHsaAbiVersion3Or4(getGlobalSTI()))
+  if (isHsaAbiVersion3AndAbove(getGlobalSTI()))
     getTargetStreamer()->EmitDirectiveAMDGCNTarget();
 
   if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
@@ -127,7 +129,7 @@ void AMDGPUAsmPrinter::emitStartOfAsmFile(Module &M) {
   if (TM.getTargetTriple().getOS() == Triple::AMDPAL)
     getTargetStreamer()->getPALMetadata()->readFromIR(M);
 
-  if (isHsaAbiVersion3Or4(getGlobalSTI()))
+  if (isHsaAbiVersion3AndAbove(getGlobalSTI()))
     return;
 
   // HSA emits NT_AMD_HSA_CODE_OBJECT_VERSION for code objects v2.
@@ -259,7 +261,7 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
 
 void AMDGPUAsmPrinter::emitFunctionEntryLabel() {
   if (TM.getTargetTriple().getOS() == Triple::AMDHSA &&
-      isHsaAbiVersion3Or4(getGlobalSTI())) {
+      isHsaAbiVersion3AndAbove(getGlobalSTI())) {
     AsmPrinter::emitFunctionEntryLabel();
     return;
   }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index 3ac7c45b3275..f5018e3a19ac 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -672,15 +672,15 @@ void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
     Kern[".kind"] = Kern.getDocument()->getNode("fini");
 }
 
-void MetadataStreamerV3::emitKernelArgs(const Function &Func,
-                                        const GCNSubtarget &ST,
+void MetadataStreamerV3::emitKernelArgs(const MachineFunction &MF,
                                         msgpack::MapDocNode Kern) {
+  auto &Func = MF.getFunction();
   unsigned Offset = 0;
   auto Args = HSAMetadataDoc->getArrayNode();
   for (auto &Arg : Func.args())
     emitKernelArg(Arg, Offset, Args);
 
-  emitHiddenKernelArgs(Func, ST, Offset, Args);
+  emitHiddenKernelArgs(MF, Offset, Args);
 
   Kern[".args"] = Args;
 }
@@ -789,10 +789,12 @@ void MetadataStreamerV3::emitKernelArg(
   Args.push_back(Arg);
 }
 
-void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
-                                              const GCNSubtarget &ST,
+void MetadataStreamerV3::emitHiddenKernelArgs(const MachineFunction &MF,
                                               unsigned &Offset,
                                               msgpack::ArrayDocNode Args) {
+  auto &Func = MF.getFunction();
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+
   unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
   if (!HiddenArgNumBytes)
     return;
@@ -910,7 +912,6 @@ void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
                                     const SIProgramInfo &ProgramInfo) {
   auto &Func = MF.getFunction();
   auto Kern = getHSAKernelProps(MF, ProgramInfo);
-  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
 
   assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
          Func.getCallingConv() == CallingConv::SPIR_KERNEL);
@@ -924,7 +925,7 @@ void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
         (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
     emitKernelLanguage(Func, Kern);
     emitKernelAttrs(Func, Kern);
-    emitKernelArgs(Func, ST, Kern);
+    emitKernelArgs(MF, Kern);
   }
 
   Kernels.push_back(Kern);
@@ -954,6 +955,97 @@ void MetadataStreamerV4::begin(const Module &Mod,
   getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
 }
 
+//===----------------------------------------------------------------------===//
+// HSAMetadataStreamerV5
+//===----------------------------------------------------------------------===//
+
+void MetadataStreamerV5::emitVersion() {
+  auto Version = HSAMetadataDoc->getArrayNode();
+  Version.push_back(Version.getDocument()->getNode(VersionMajorV5));
+  Version.push_back(Version.getDocument()->getNode(VersionMinorV5));
+  getRootMetadata("amdhsa.version") = Version;
+}
+
+void MetadataStreamerV5::emitHiddenKernelArgs(const MachineFunction &MF,
+                                              unsigned &Offset,
+                                              msgpack::ArrayDocNode Args) {
+  auto &Func = MF.getFunction();
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+  const Module *M = Func.getParent();
+  auto &DL = M->getDataLayout();
+
+  auto Int64Ty = Type::getInt64Ty(Func.getContext());
+  auto Int32Ty = Type::getInt32Ty(Func.getContext());
+  auto Int16Ty = Type::getInt16Ty(Func.getContext());
+
+  emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args);
+  emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args);
+  emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args);
+
+  emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_x", Offset, Args);
+  emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_y", Offset, Args);
+  emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_z", Offset, Args);
+
+  emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_x", Offset, Args);
+  emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_y", Offset, Args);
+  emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_z", Offset, Args);
+
+  // Reserved for hidden_tool_correlation_id.
+  Offset += 8;
+
+  Offset += 8; // Reserved.
+
+  emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, Args);
+  emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, Args);
+  emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, Args);
+
+  emitKernelArg(DL, Int16Ty, Align(2), "hidden_grid_dims", Offset, Args);
+
+  Offset += 6; // Reserved.
+  auto Int8PtrTy =
+      Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
+
+  if (M->getNamedMetadata("llvm.printf.fmts")) {
+    emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
+                  Args);
+  } else
+    Offset += 8; // Skipped.
+
+  if (M->getModuleFlag("amdgpu_hostcall")) {
+    emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
+                  Args);
+  } else
+    Offset += 8; // Skipped.
+
+  emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
+                Args);
+
+  // Ignore temporarily until it is implemented.
+  // emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args);
+  Offset += 8;
+
+  if (Func.hasFnAttribute("calls-enqueue-kernel")) {
+    emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
+                  Args);
+    emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
+                  Args);
+  } else
+    Offset += 16; // Skipped.
+
+  Offset += 72; // Reserved.
+
+  // hidden_private_base and hidden_shared_base are only used by GFX8.
+  if (ST.getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+    emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args);
+    emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args);
+  } else
+    Offset += 8; // Skipped.
+
+  const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+  if (MFI.hasQueuePtr())
+    emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args);
+}
+
 } // end namespace HSAMD
 } // end namespace AMDGPU
 } // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
index 54ed0afbba6d..bcf7fc449094 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
@@ -53,6 +53,11 @@ public:
 
   virtual void emitKernel(const MachineFunction &MF,
                           const SIProgramInfo &ProgramInfo) = 0;
+
+protected:
+  virtual void emitVersion() = 0;
+  virtual void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
+                                    msgpack::ArrayDocNode Args) = 0;
 };
 
 // TODO: Rename MetadataStreamerV3 -> MetadataStreamerMsgPackV3.
@@ -79,7 +84,7 @@ protected:
   msgpack::MapDocNode getHSAKernelProps(const MachineFunction &MF,
                                         const SIProgramInfo &ProgramInfo) const;
 
-  void emitVersion();
+  void emitVersion() override;
 
   void emitPrintf(const Module &Mod);
 
@@ -87,8 +92,7 @@ protected:
 
   void emitKernelAttrs(const Function &Func, msgpack::MapDocNode Kern);
 
-  void emitKernelArgs(const Function &Func, const GCNSubtarget &ST,
-                      msgpack::MapDocNode Kern);
+  void emitKernelArgs(const MachineFunction &MF, msgpack::MapDocNode Kern);
 
   void emitKernelArg(const Argument &Arg, unsigned &Offset,
                      msgpack::ArrayDocNode Args);
@@ -100,8 +104,8 @@ protected:
                      StringRef BaseTypeName = "", StringRef AccQual = "",
                      StringRef TypeQual = "");
 
-  void emitHiddenKernelArgs(const Function &Func, const GCNSubtarget &ST,
-                            unsigned &Offset, msgpack::ArrayDocNode Args);
+  void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
+                            msgpack::ArrayDocNode Args) override;
 
   msgpack::DocNode &getRootMetadata(StringRef Key) {
     return HSAMetadataDoc->getRoot().getMap(/*Convert=*/true)[Key];
@@ -127,9 +131,9 @@ public:
 };
 
 // TODO: Rename MetadataStreamerV4 -> MetadataStreamerMsgPackV4.
-class MetadataStreamerV4 final : public MetadataStreamerV3 {
-  void emitVersion();
-
+class MetadataStreamerV4 : public MetadataStreamerV3 {
+protected:
+  void emitVersion() override;
   void emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID);
 
 public:
@@ -140,6 +144,18 @@ public:
              const IsaInfo::AMDGPUTargetID &TargetID) override;
 };
 
+// TODO: Rename MetadataStreamerV5 -> MetadataStreamerMsgPackV5.
+class MetadataStreamerV5 final : public MetadataStreamerV4 {
+protected:
+  void emitVersion() override;
+  void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
+                            msgpack::ArrayDocNode Args) override;
+
+public:
+  MetadataStreamerV5() = default;
+  ~MetadataStreamerV5() = default;
+};
+
 // TODO: Rename MetadataStreamerV2 -> MetadataStreamerYamlV2.
 class MetadataStreamerV2 final : public MetadataStreamer {
 private:
@@ -167,8 +183,6 @@ private:
       const MachineFunction &MF,
       const SIProgramInfo &ProgramInfo) const;
 
-  void emitVersion();
-
   void emitPrintf(const Module &Mod);
 
   void emitKernelLanguage(const Function &Func);
@@ -191,6 +205,13 @@ private:
     return HSAMetadata;
   }
 
+protected:
+  void emitVersion() override;
+  void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
+                            msgpack::ArrayDocNode Args) override {
+    llvm_unreachable("Dummy override should not be invoked!");
+  }
+
 public:
   MetadataStreamerV2() = default;
   ~MetadataStreamerV2() = default;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 04c6f67ed339..645d05aa9238 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -4778,6 +4778,7 @@ bool AMDGPULegalizerInfo::legalizeTrapIntrinsic(MachineInstr &MI,
     case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
       return legalizeTrapHsaQueuePtr(MI, MRI, B);
     case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
+    case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
       return ST.supportsGetDoorbellID() ?
           legalizeTrapHsa(MI, MRI, B) :
           legalizeTrapHsaQueuePtr(MI, MRI, B);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index c28427758ac7..bbbadfdfd444 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -16,8 +16,9 @@
 #include "GCNSubtarget.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Loads.h"
-#include "llvm/IR/IntrinsicsAMDGPU.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Target/TargetMachine.h"
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 2d8126a49327..99b7ffb33884 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -13,15 +13,16 @@
 
 #include "AMDGPU.h"
 #include "GCNSubtarget.h"
+#include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/IntrinsicsAMDGPU.h"
 #include "llvm/IR/IntrinsicsR600.h"
 #include "llvm/Pass.h"
 #include "llvm/Target/TargetMachine.h"
-#include "Utils/AMDGPUBaseInfo.h"
 
 #define DEBUG_TYPE "amdgpu-promote-alloca"
 
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index c1c88d9a7462..ffe626513d47 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1129,7 +1129,8 @@ class KernelScopeInfo {
     if (i >= SgprIndexUnusedMin) {
       SgprIndexUnusedMin = ++i;
       if (Ctx) {
-        MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
+        MCSymbol* const Sym =
+          Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
       }
     }
@@ -1139,7 +1140,8 @@ class KernelScopeInfo {
     if (i >= VgprIndexUnusedMin) {
       VgprIndexUnusedMin = ++i;
       if (Ctx) {
-        MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
+        MCSymbol* const Sym =
+          Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
       }
     }
@@ -1296,7 +1298,7 @@ public:
       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
       MCContext &Ctx = getContext();
-      if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
+      if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
         MCSymbol *Sym =
             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
@@ -1313,7 +1315,7 @@ public:
         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
       }
-      if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
+      if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
         initializeGprCountSymbol(IS_VGPR);
         initializeGprCountSymbol(IS_SGPR);
       } else
@@ -2747,7 +2749,7 @@ AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
     return nullptr;
   }
-  if (isHsaAbiVersion3Or4(&getSTI())) {
+  if (isHsaAbiVersion3AndAbove(&getSTI())) {
     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
       return nullptr;
   } else
@@ -5099,7 +5101,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
   const char *AssemblerDirectiveBegin;
   const char *AssemblerDirectiveEnd;
   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
-      isHsaAbiVersion3Or4(&getSTI())
+      isHsaAbiVersion3AndAbove(&getSTI())
           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
                             HSAMD::V3::AssemblerDirectiveEnd)
           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
@@ -5116,7 +5118,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
                           HSAMetadataString))
     return true;
 
-  if (isHsaAbiVersion3Or4(&getSTI())) {
+  if (isHsaAbiVersion3AndAbove(&getSTI())) {
     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
       return Error(getLoc(), "invalid HSA metadata");
   } else {
@@ -5266,7 +5268,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
   StringRef IDVal = DirectiveID.getString();
 
-  if (isHsaAbiVersion3Or4(&getSTI())) {
+  if (isHsaAbiVersion3AndAbove(&getSTI())) {
     if (IDVal == ".amdhsa_kernel")
      return ParseDirectiveAMDHSAKernel();
 
@@ -7440,7 +7442,7 @@ void AMDGPUAsmParser::onBeginOfFile() {
   if (!getTargetStreamer().getTargetID())
     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
 
-  if (isHsaAbiVersion3Or4(&getSTI()))
+  if (isHsaAbiVersion3AndAbove(&getSTI()))
     getTargetStreamer().EmitDirectiveAMDGCNTarget();
 }
 
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 9578bdb0bad0..7aa5f1abf65b 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -396,6 +396,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
       break;
     case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
     case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
+    case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
       if (getTargetID()->isXnackSupported())
         OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n';
       break;
@@ -578,6 +579,7 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
     case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
       return getEFlagsV3();
     case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
+    case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
       return getEFlagsV4();
     }
   }
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 561866b5a398..e2f4a0896bc3 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5423,6 +5423,7 @@ SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const {
     case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
       return lowerTrapHsaQueuePtr(Op, DAG);
     case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
+    case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
       return Subtarget->supportsGetDoorbellID() ?
           lowerTrapHsa(Op, DAG) : lowerTrapHsaQueuePtr(Op, DAG);
     }
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index c18637bdbc43..44bdbe37dec0 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -938,12 +938,6 @@ bool SILoadStoreOptimizer::checkAndPrepareMerge(
       // 2. It is safe to move MBBI down past the instruction that I will
       //    be merged into.
 
-      if (MBBI->hasUnmodeledSideEffects()) {
-        // We can't re-order this instruction with respect to other memory
-        // operations, so we fail both conditions mentioned above.
-        return false;
-      }
-
       if (MBBI->mayLoadOrStore() &&
           (!memAccessesCanBeReordered(*CI.I, *MBBI, AA) ||
            !canMoveInstsAcrossMemOp(*MBBI, InstsToMove, AA))) {
@@ -1977,10 +1971,10 @@ SILoadStoreOptimizer::collectMergeableInsts(
     if (promoteConstantOffsetToImm(MI, Visited, AnchorList))
       Modified = true;
 
-    // Don't combine if volatile. We also won't be able to merge across this, so
-    // break the search. We can look after this barrier for separate merges.
-    if (MI.hasOrderedMemoryRef()) {
-      LLVM_DEBUG(dbgs() << "Breaking search on memory fence: " << MI);
+    // Treat volatile accesses, ordered accesses and unmodeled side effects as
+    // barriers. We can look after this barrier for separate merges.
+    if (MI.hasOrderedMemoryRef() || MI.hasUnmodeledSideEffects()) {
+      LLVM_DEBUG(dbgs() << "Breaking search on barrier: " << MI);
 
       // Search will resume after this instruction in a separate merge list.
       ++BlockI;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 1e96266eb06c..683be871ff82 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -99,6 +99,8 @@ Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) {
     return ELF::ELFABIVERSION_AMDGPU_HSA_V3;
   case 4:
     return ELF::ELFABIVERSION_AMDGPU_HSA_V4;
+  case 5:
+    return ELF::ELFABIVERSION_AMDGPU_HSA_V5;
   default:
     report_fatal_error(Twine("Unsupported AMDHSA Code Object Version ") +
                        Twine(AmdhsaCodeObjectVersion));
@@ -123,8 +125,15 @@ bool isHsaAbiVersion4(const MCSubtargetInfo *STI) {
   return false;
 }
 
-bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI) {
-  return isHsaAbiVersion3(STI) || isHsaAbiVersion4(STI);
+bool isHsaAbiVersion5(const MCSubtargetInfo *STI) {
+  if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
+    return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V5;
+  return false;
+}
+
+bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI) {
+  return isHsaAbiVersion3(STI) || isHsaAbiVersion4(STI) ||
+         isHsaAbiVersion5(STI);
 }
 
 #define GET_MIMGBaseOpcodesTable_IMPL
@@ -495,6 +504,7 @@ std::string AMDGPUTargetID::toString() const {
         Features += "+sram-ecc";
       break;
     case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
+    case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
       // sramecc.
       if (getSramEccSetting() == TargetIDSetting::Off)
         Features += ":sramecc-";
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 89f928eb8b92..4516b511f3c8 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -47,9 +47,12 @@ bool isHsaAbiVersion3(const MCSubtargetInfo *STI);
 /// \returns True if HSA OS ABI Version identification is 4,
 /// false otherwise.
 bool isHsaAbiVersion4(const MCSubtargetInfo *STI);
+/// \returns True if HSA OS ABI Version identification is 5,
+/// false otherwise.
+bool isHsaAbiVersion5(const MCSubtargetInfo *STI);
 /// \returns True if HSA OS ABI Version identification is 3 or 4,
 /// false otherwise.
-bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI);
+bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI);
 
 struct GcnBufferFormatInfo {
   unsigned Format;
diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td
index 4efbdbb2abc8..27edf69b4abf 100644
--- a/llvm/lib/Target/ARM/ARM.td
+++ b/llvm/lib/Target/ARM/ARM.td
@@ -656,6 +656,8 @@ def ProcA710    : SubtargetFeature<"cortex-a710", "ARMProcFamily",
                                    "CortexA710", "Cortex-A710 ARM processors", []>;
 def ProcX1      : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1",
                                    "Cortex-X1 ARM processors", []>;
+def ProcX1C     : SubtargetFeature<"cortex-x1c", "ARMProcFamily", "CortexX1C",
+                                   "Cortex-X1C ARM processors", []>;
 
 def ProcV1      : SubtargetFeature<"neoverse-v1", "ARMProcFamily",
                                    "NeoverseV1", "Neoverse-V1 ARM processors", []>;
@@ -1443,6 +1445,14 @@ def : ProcNoItin<"cortex-x1",                           [ARMv82a, ProcX1,
                                                          FeatureFullFP16,
                                                          FeatureDotProd]>;
 
+def : ProcNoItin<"cortex-x1c",                          [ARMv82a, ProcX1C,
+                                                         FeatureHWDivThumb,
+                                                         FeatureHWDivARM,
+                                                         FeatureCrypto,
+                                                         FeatureCRC,
+                                                         FeatureFullFP16,
+                                                         FeatureDotProd]>;
+
 def : ProcNoItin<"neoverse-v1",                         [ARMv84a,
                                                          FeatureHWDivThumb,
                                                          FeatureHWDivARM,
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index cde715880376..5b0bae4d9274 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -752,23 +752,17 @@ unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
   const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
 
   const MCInstrDesc &MCID = MI.getDesc();
-  if (MCID.getSize())
-    return MCID.getSize();
 
   switch (MI.getOpcode()) {
   default:
-    // pseudo-instruction sizes are zero.
-    return 0;
+    // Return the size specified in .td file. If there's none, return 0, as we
+    // can't define a default size (Thumb1 instructions are 2 bytes, Thumb2
+    // instructions are 2-4 bytes, and ARM instructions are 4 bytes), in
+    // contrast to AArch64 instructions which have a default size of 4 bytes for
+    // example.
+    return MCID.getSize();
   case TargetOpcode::BUNDLE:
     return getInstBundleLength(MI);
-  case ARM::MOVi16_ga_pcrel:
-  case ARM::MOVTi16_ga_pcrel:
-  case ARM::t2MOVi16_ga_pcrel:
-  case ARM::t2MOVTi16_ga_pcrel:
-    return 4;
-  case ARM::MOVi32imm:
-  case ARM::t2MOVi32imm:
-    return 8;
   case ARM::CONSTPOOL_ENTRY:
   case ARM::JUMPTABLE_INSTS:
   case ARM::JUMPTABLE_ADDRS:
@@ -777,19 +771,6 @@ unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
     // If this machine instr is a constant pool entry, its size is recorded as
     // operand #2.
     return MI.getOperand(2).getImm();
-  case ARM::Int_eh_sjlj_longjmp:
-    return 16;
-  case ARM::tInt_eh_sjlj_longjmp:
-    return 10;
-  case ARM::tInt_WIN_eh_sjlj_longjmp:
-    return 12;
-  case ARM::Int_eh_sjlj_setjmp:
-  case ARM::Int_eh_sjlj_setjmp_nofp:
-    return 20;
-  case ARM::tInt_eh_sjlj_setjmp:
-  case ARM::t2Int_eh_sjlj_setjmp:
-  case ARM::t2Int_eh_sjlj_setjmp_nofp:
-    return 12;
   case ARM::SPACE:
     return MI.getOperand(1).getImm();
   case ARM::INLINEASM:
@@ -800,14 +781,6 @@ unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
       Size = alignTo(Size, 4);
     return Size;
   }
-  case ARM::SpeculationBarrierISBDSBEndBB:
-  case ARM::t2SpeculationBarrierISBDSBEndBB:
-    // This gets lowered to 2 4-byte instructions.
-    return 8;
-  case ARM::SpeculationBarrierSBEndBB:
-  case ARM::t2SpeculationBarrierSBEndBB:
-    // This gets lowered to 1 4-byte instructions.
-    return 4;
   }
 }
 
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index fe4e6b24367a..1b41427a1cab 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -14527,7 +14527,7 @@ static SDValue PerformXORCombine(SDNode *N,
     SDValue N0 = N->getOperand(0);
     SDValue N1 = N->getOperand(1);
     const TargetLowering *TLI = Subtarget->getTargetLowering();
-    if (TLI->isConstTrueVal(N1.getNode()) &&
+    if (TLI->isConstTrueVal(N1) &&
         (N0->getOpcode() == ARMISD::VCMP || N0->getOpcode() == ARMISD::VCMPZ)) {
       if (CanInvertMVEVCMP(N0)) {
         SDLoc DL(N0);
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index 1c1db473f866..32a3911d3369 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -3657,6 +3657,8 @@ def : InstAlias<"mov${p} $Rd, $imm",
                 (MOVi16 GPR:$Rd, imm0_65535_expr:$imm, pred:$p), 0>,
         Requires<[IsARM, HasV6T2]>;
 
+// This gets lowered to a single 4-byte instructions
+let Size = 4 in
 def MOVi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
                                 (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>,
                       Sched<[WriteALU]>;
@@ -3680,6 +3682,8 @@ def MOVTi16 : AI1<0b1010, (outs GPRnopc:$Rd),
   let DecoderMethod = "DecodeArmMOVTWInstruction";
 }
 
+// This gets lowered to a single 4-byte instructions
+let Size = 4 in
 def MOVTi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
                       (ins GPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>,
                       Sched<[WriteALU]>;
@@ -5895,27 +5899,30 @@ def : ARMPat<(ARMthread_pointer), (MRC 15, 0, 13, 0, 3)>,
 //
 // These are pseudo-instructions and are lowered to individual MC-insts, so
 // no encoding information is necessary.
+// This gets lowered to an instruction sequence of 20 bytes
 let Defs =
   [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR, CPSR,
     Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15 ],
-  hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+  hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1, Size = 20 in {
   def Int_eh_sjlj_setjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
                                NoItinerary,
                          [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
                            Requires<[IsARM, HasVFP2]>;
 }
 
+// This gets lowered to an instruction sequence of 20 bytes
 let Defs =
   [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR, CPSR ],
-  hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+  hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1, Size = 20 in {
   def Int_eh_sjlj_setjmp_nofp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
                                    NoItinerary,
                          [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
                                 Requires<[IsARM, NoVFP]>;
 }
 
+// This gets lowered to an instruction sequence of 16 bytes
 // FIXME: Non-IOS version(s)
-let isBarrier = 1, hasSideEffects = 1, isTerminator = 1,
+let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, Size = 16,
     Defs = [ R7, LR, SP ] in {
 def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch),
                              NoItinerary,
@@ -5958,7 +5965,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in
 // This is a single pseudo instruction, the benefit is that it can be remat'd
 // as a single unit instead of having to handle reg inputs.
 // FIXME: Remove this when we can do generalized remat.
-let isReMaterializable = 1, isMoveImm = 1 in
+let isReMaterializable = 1, isMoveImm = 1, Size = 8 in
 def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
                            [(set GPR:$dst, (arm_i32imm:$src))]>,
                            Requires<[IsARM]>;
@@ -6419,8 +6426,12 @@ def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn),
 // SpeculationBarrierEndBB must only be used after an unconditional control
 // flow, i.e. after a terminator for which isBarrier is True.
 let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in {
+  // This gets lowered to a pair of 4-byte instructions
+  let Size = 8 in
   def SpeculationBarrierISBDSBEndBB
       : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
+  // This gets lowered to a single 4-byte instructions
+  let Size = 4 in
   def SpeculationBarrierSBEndBB
       : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
 }
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td
index f09ad8167600..71527ae1ab11 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -1537,25 +1537,28 @@ def tTPsoft : tPseudoInst<(outs), (ins), 4, IIC_Br,
 // Defs. By doing so, we also cause the prologue/epilogue code to actively
 // preserve all of the callee-saved registers, which is exactly what we want.
 // $val is a scratch register for our use.
+// This gets lowered to an instruction sequence of 12 bytes
 let Defs = [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7, R12, CPSR ],
-    hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+    hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, Size = 12,
     usesCustomInserter = 1 in
 def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val),
                                   AddrModeNone, 0, NoItinerary, "","",
                           [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>;
 
+// This gets lowered to an instruction sequence of 10 bytes
 // FIXME: Non-IOS version(s)
 let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, isCodeGenOnly = 1,
-    Defs = [ R7, LR, SP ] in
+    Size = 10, Defs = [ R7, LR, SP ] in
 def tInt_eh_sjlj_longjmp : XI<(outs), (ins tGPR:$src, tGPR:$scratch),
                               AddrModeNone, 0, IndexModeNone,
                               Pseudo, NoItinerary, "", "",
                               [(ARMeh_sjlj_longjmp tGPR:$src, tGPR:$scratch)]>,
                              Requires<[IsThumb,IsNotWindows]>;
 
+// This gets lowered to an instruction sequence of 12 bytes
 // (Windows is Thumb2-only)
 let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, isCodeGenOnly = 1,
-    Defs = [ R11, LR, SP ] in
+    Size = 12, Defs = [ R11, LR, SP ] in
 def tInt_WIN_eh_sjlj_longjmp
   : XI<(outs), (ins GPR:$src, GPR:$scratch), AddrModeNone, 0, IndexModeNone,
        Pseudo, NoItinerary, "", "", [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 6e8e61ca2b8e..f80b9a5053f7 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -2194,6 +2194,8 @@ def : InstAlias<"mov${p} $Rd, $imm",
                 (t2MOVi16 rGPR:$Rd, imm256_65535_expr:$imm, pred:$p), 0>,
                 Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteALU]>;
 
+// This gets lowered to a single 4-byte instructions
+let Size = 4 in
 def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
                                 (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>,
                         Sched<[WriteALU]>;
@@ -2223,6 +2225,8 @@ def t2MOVTi16 : T2I<(outs rGPR:$Rd),
   let DecoderMethod = "DecodeT2MOVTWInstruction";
 }
 
+// This gets lowered to a single 4-byte instructions
+let Size = 4 in
 def t2MOVTi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
                      (ins rGPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>,
                      Sched<[WriteALU]>, Requires<[IsThumb, HasV8MBaseline]>;
@@ -3814,10 +3818,11 @@ def : T2Pat<(stlex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr),
 //   doing so, we also cause the prologue/epilogue code to actively preserve
 //   all of the callee-saved registers, which is exactly what we want.
 //   $val is a scratch register for our use.
+// This gets lowered to an instruction sequence of 12 bytes
 let Defs =
   [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR, CPSR,
     Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15],
-  hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+  hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, Size = 12,
   usesCustomInserter = 1 in {
   def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
                                AddrModeNone, 0, NoItinerary, "", "",
@@ -3825,9 +3830,10 @@ let Defs =
                              Requires<[IsThumb2, HasVFP2]>;
 }
 
+// This gets lowered to an instruction sequence of 12 bytes
 let Defs =
   [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR, CPSR ],
-  hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+  hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, Size = 12,
   usesCustomInserter = 1 in {
   def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
                                AddrModeNone, 0, NoItinerary, "", "",
@@ -4224,7 +4230,7 @@ def : t2InstAlias<"hvc\t$imm16", (t2HVC imm0_65535:$imm16)>;
 // 32-bit immediate using movw + movt.
 // This is a single pseudo instruction to make it re-materializable.
 // FIXME: Remove this when we can do generalized remat.
-let isReMaterializable = 1, isMoveImm = 1 in
+let isReMaterializable = 1, isMoveImm = 1, Size = 8 in
 def t2MOVi32imm : PseudoInst<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
                             [(set rGPR:$dst, (i32 imm:$src))]>,
                             Requires<[IsThumb, UseMovt]>;
@@ -5006,8 +5012,12 @@ def : InstAlias<"dfb${p}", (t2DSB 0xc, pred:$p), 1>, Requires<[HasDFB]>;
 // SpeculationBarrierEndBB must only be used after an unconditional control
 // flow, i.e. after a terminator for which isBarrier is True.
 let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in {
+  // This gets lowered to a pair of 4-byte instructions
+  let Size = 8 in
   def t2SpeculationBarrierISBDSBEndBB
       : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
+  // This gets lowered to a single 4-byte instructions
+  let Size = 4 in
   def t2SpeculationBarrierSBEndBB
       : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
 }
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 2dd25234dc50..32160b109343 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -304,6 +304,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
   case CortexM7:
   case CortexR52:
   case CortexX1:
+  case CortexX1C:
     break;
   case Exynos:
     LdStMultipleTiming = SingleIssuePlusExtras;
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 1c2b7ee6ba35..7cbdc014299f 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -77,6 +77,7 @@ protected:
     CortexR52,
     CortexR7,
     CortexX1,
+    CortexX1C,
     Exynos,
     Krait,
     Kryo,
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index e0750a9945d2..d9d563ead260 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -2109,9 +2109,6 @@ static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
       }
 
       Type *T  = I.getType();
-      if (T->isPointerTy())
-        T = T->getPointerElementType();
-
       if (T->getScalarSizeInBits() > 32) {
         LLVM_DEBUG(dbgs() << "Unsupported Type: "; T->dump());
         return false;
diff --git a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
index ea6a7498e27f..311e43d77210 100644
--- a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
@@ -313,12 +313,18 @@ bool HexagonOptAddrMode::isSafeToExtLR(NodeAddr<StmtNode *> SN,
       return false;
     }
 
+    // If the register is undefined (for example if it's a reserved register),
+    // it may still be possible to extend the range, but it's safer to be
+    // conservative and just punt.
+    if (LRExtRegRD == 0)
+      return false;
+
     MachineInstr *UseMI = NodeAddr<StmtNode *>(IA).Addr->getCode();
     NodeAddr<DefNode *> LRExtRegDN = DFG->addr<DefNode *>(LRExtRegRD);
     // Reaching Def to LRExtReg can't be a phi.
     if ((LRExtRegDN.Addr->getFlags() & NodeAttrs::PhiRef) &&
         MI->getParent() != UseMI->getParent())
-    return false;
+      return false;
   }
   return true;
 }
diff --git a/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.cpp b/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.cpp
index 860c0ce29326..79e9ad4dd1d2 100644
--- a/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.cpp
+++ b/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.cpp
@@ -21,13 +21,32 @@ using namespace llvm;
 
 M68kLegalizerInfo::M68kLegalizerInfo(const M68kSubtarget &ST) {
   using namespace TargetOpcode;
-  const LLT S32 = LLT::scalar(32);
-  const LLT P0 = LLT::pointer(0, 32);
-  getActionDefinitionsBuilder(G_LOAD).legalFor({S32});
-  getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({P0});
-  getActionDefinitionsBuilder(G_ADD).legalFor({S32});
-  getActionDefinitionsBuilder(G_SUB).legalFor({S32});
-  getActionDefinitionsBuilder(G_MUL).legalFor({S32});
-  getActionDefinitionsBuilder(G_UDIV).legalFor({S32});
+  const LLT s8 = LLT::scalar(8);
+  const LLT s16 = LLT::scalar(16);
+  const LLT s32 = LLT::scalar(32);
+  const LLT p0 = LLT::pointer(0, 32);
+
+  getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_UDIV, G_AND})
+      .legalFor({s8, s16, s32})
+      .clampScalar(0, s8, s32)
+      .widenScalarToNextPow2(0, 8);
+
+  getActionDefinitionsBuilder(G_CONSTANT)
+      .legalFor({s32, p0})
+      .clampScalar(0, s32, s32);
+
+  getActionDefinitionsBuilder({G_FRAME_INDEX, G_GLOBAL_VALUE}).legalFor({p0});
+
+  getActionDefinitionsBuilder({G_STORE, G_LOAD})
+      .legalForTypesWithMemDesc({{s32, p0, s32, 4},
+                                 {s32, p0, s16, 4},
+                                 {s32, p0, s8, 4},
+                                 {s16, p0, s16, 2},
+                                 {s8, p0, s8, 1},
+                                 {p0, p0, s32, 4}})
+      .clampScalar(0, s8, s32);
+
+  getActionDefinitionsBuilder(G_PTR_ADD).legalFor({{p0, s32}});
+
   getLegacyLegalizerInfo().computeTables();
 }
diff --git a/llvm/lib/Target/M68k/M68kInstrBits.td b/llvm/lib/Target/M68k/M68kInstrBits.td
index d610bce5c277..0d1278102378 100644
--- a/llvm/lib/Target/M68k/M68kInstrBits.td
+++ b/llvm/lib/Target/M68k/M68kInstrBits.td
@@ -79,6 +79,10 @@ def BTST32di : MxBTST_RI<MxType32d>;
 // Memory BTST limited to 8 bits only
 def BTST8jd : MxBTST_MR<MxType8d, MxType8.JOp, MxType8.JPat,
                         MxEncEAj_0, MxExtEmpty>;
+def BTST8od : MxBTST_MR<MxType8d, MxType8.OOp, MxType8.OPat,
+                        MxEncEAo_0, MxExtEmpty>;
+def BTST8ed : MxBTST_MR<MxType8d, MxType8.EOp, MxType8.EPat,
+                        MxEncEAe_0, MxExtEmpty>;
 def BTST8pd : MxBTST_MR<MxType8d, MxType8.POp, MxType8.PPat,
                         MxEncEAp_0, MxExtI16_0>;
 def BTST8fd : MxBTST_MR<MxType8d, MxType8.FOp, MxType8.FPat,
@@ -90,6 +94,10 @@ def BTST8kd : MxBTST_MR<MxType8d, MxType8.KOp, MxType8.KPat,
 
 def BTST8ji : MxBTST_MI<MxType8d, MxType8.JOp, MxType8.JPat,
                         MxEncEAj_0, MxExtEmpty>;
+def BTST8oi : MxBTST_MI<MxType8d, MxType8.OOp, MxType8.OPat,
+                        MxEncEAo_0, MxExtEmpty>;
+def BTST8ei : MxBTST_MI<MxType8d, MxType8.EOp, MxType8.EPat,
+                        MxEncEAe_0, MxExtEmpty>;
 def BTST8pi : MxBTST_MI<MxType8d, MxType8.POp, MxType8.PPat,
                         MxEncEAp_0, MxExtI16_0>;
 def BTST8fi : MxBTST_MI<MxType8d, MxType8.FOp, MxType8.FPat,
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index eac237bb27bb..7b5248906b56 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -574,7 +574,6 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
   for (const auto &Op : {ISD::FMINIMUM, ISD::FMAXIMUM}) {
     setFP16OperationAction(Op, MVT::f16, GetMinMaxAction(Expand), Expand);
     setOperationAction(Op, MVT::f32, GetMinMaxAction(Expand));
-    setOperationAction(Op, MVT::f64, GetMinMaxAction(Expand));
     setFP16OperationAction(Op, MVT::v2f16, GetMinMaxAction(Expand), Expand);
   }
 
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 22e200e77831..22084cddc092 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -896,6 +896,7 @@ defm FMUL : F3_fma_component<"mul", fmul>;
 
 defm FMIN : F3<"min", fminnum>;
 defm FMAX : F3<"max", fmaxnum>;
+// Note: min.NaN.f64 and max.NaN.f64 do not actually exist.
 defm FMINNAN : F3<"min.NaN", fminimum>;
 defm FMAXNAN : F3<"max.NaN", fmaximum>;
 
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 25cc34badda0..cbeae0ab03b8 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1252,7 +1252,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
         setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Legal);
         setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Legal);
         setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal);
-        setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal);
       } else {
         setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
         setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
@@ -9093,22 +9092,30 @@ bool llvm::checkConvertToNonDenormSingle(APFloat &ArgAPFloat) {
 
 static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
                              unsigned &Opcode) {
-  const SDNode *InputNode = Op.getOperand(0).getNode();
-  if (!InputNode || !ISD::isUNINDEXEDLoad(InputNode))
-    return false;
-
-  if (!Subtarget.hasVSX())
+  LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
+  if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
     return false;
 
   EVT Ty = Op->getValueType(0);
-  if (Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32 ||
-      Ty == MVT::v8i16 || Ty == MVT::v16i8)
+  // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
+  // as we cannot handle extending loads for these types.
+  if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
+      ISD::isNON_EXTLoad(InputNode))
+    return true;
+
+  EVT MemVT = InputNode->getMemoryVT();
+  // For v8i16 and v16i8 types, extending loads can be handled as long as the
+  // memory VT is the same vector element VT type.
+  // The loads feeding into the v8i16 and v16i8 types will be extending because
+  // scalar i8/i16 are not legal types.
+  if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
+      (MemVT == Ty.getVectorElementType()))
     return true;
 
   if (Ty == MVT::v2i64) {
     // Check the extend type, when the input type is i32, and the output vector
     // type is v2i64.
-    if (cast<LoadSDNode>(Op.getOperand(0))->getMemoryVT() == MVT::i32) {
+    if (MemVT == MVT::i32) {
       if (ISD::isZEXTLoad(InputNode))
         Opcode = PPCISD::ZEXT_LD_SPLAT;
       if (ISD::isSEXTLoad(InputNode))
@@ -10755,6 +10762,26 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
   if (VT == MVT::v2f64 && C)
     return Op;
 
+  if (Subtarget.hasP9Vector()) {
+    // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
+    // because on P10, it allows this specific insert_vector_elt load pattern to
+    // utilize the refactored load and store infrastructure in order to exploit
+    // prefixed loads.
+    // On targets with inexpensive direct moves (Power9 and up), a
+    // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
+    // load since a single precision load will involve conversion to double
+    // precision on the load followed by another conversion to single precision.
+    if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
+        (isa<LoadSDNode>(V2))) {
+      SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
+      SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
+      SDValue InsVecElt =
+          DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
+                      BitcastLoad, Op.getOperand(2));
+      return DAG.getBitcast(MVT::v4f32, InsVecElt);
+    }
+  }
+
   if (Subtarget.isISA3_1()) {
     if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
       return SDValue();
diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index fe354208533b..ff43426dd1ef 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -2816,32 +2816,20 @@ let Predicates = [IsISA3_1, HasVSX, IsLittleEndian] in {
 
   def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i64:$rB)),
             (VINSWVRX $vDi, InsertEltShift.Sub32Left2, (XSCVDPSPN $rA))>;
-  def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)),
-            (VINSWRX $vDi, InsertEltShift.Sub32Left2, (LWZ memri:$rA))>;
-  def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)),
-            (VINSWRX $vDi, InsertEltShift.Sub32Left2, (PLWZ memri34:$rA))>;
-  def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)),
-            (VINSWRX $vDi, InsertEltShift.Sub32Left2, (LWZX memrr:$rA))>;
 
   def : Pat<(v2f64 (insertelt v2f64:$vDi,  f64:$A, i64:$rB)),
             (VINSDRX $vDi, InsertEltShift.Left3, Bitcast.DblToLong)>;
-  def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)),
+  def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load DSForm:$rA)), i64:$rB)),
             (VINSDRX $vDi, InsertEltShift.Left3, (LD memrix:$rA))>;
-  def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)),
+  def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load PDForm:$rA)), i64:$rB)),
             (VINSDRX $vDi, InsertEltShift.Left3, (PLD memri34:$rA))>;
-  def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)),
+  def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load XForm:$rA)), i64:$rB)),
             (VINSDRX $vDi, InsertEltShift.Left3, (LDX memrr:$rA))>;
   let AddedComplexity = 400 in {
     // Immediate vector insert element
     foreach Idx = [0, 1, 2, 3] in {
       def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, Idx)),
                 (VINSW $vDi, !mul(!sub(3, Idx), 4), $rA)>;
-      def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), Idx)),
-                (VINSW $vDi, !mul(!sub(3, Idx), 4), (LWZ memri:$rA))>;
-      def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), Idx)),
-                (VINSW $vDi, !mul(!sub(3, Idx), 4), (PLWZ memri34:$rA))>;
-      def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), Idx)),
-                (VINSW $vDi, !mul(!sub(3, Idx), 4), (LWZX memrr:$rA))>;
     }
     foreach i = [0, 1] in
      def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, (i64 i))),
@@ -2860,12 +2848,6 @@ let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC32] in {
 
   def : Pat<(v4f32 (insertelt v4f32:$vDi,  f32:$rA, i32:$rB)),
             (VINSWVLX $vDi, InsertEltShift.Left2, (XSCVDPSPN $rA))>;
-  def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i32:$rB)),
-            (VINSWLX v4f32:$vDi, InsertEltShift.Left2, (LWZ memri:$rA))>;
-  def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i32:$rB)),
-            (VINSWLX v4f32:$vDi, InsertEltShift.Left2, (PLWZ memri34:$rA))>;
-  def: Pat<(v4f32(insertelt v4f32 : $vDi, (f32(load xaddr : $rA)), i32 : $rB)),
-           (VINSWLX v4f32 : $vDi, InsertEltShift.Left2, (LWZX memrr : $rA))>;
 }
 
 let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC64] in {
@@ -2881,20 +2863,14 @@ let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC64] in {
 
   def : Pat<(v4f32 (insertelt v4f32:$vDi,  f32:$rA, i64:$rB)),
             (VINSWVLX $vDi, InsertEltShift.Sub32Left2, (XSCVDPSPN $rA))>;
-  def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)),
-            (VINSWLX $vDi, InsertEltShift.Sub32Left2, (LWZ memri:$rA))>;
-  def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)),
-            (VINSWLX $vDi, InsertEltShift.Sub32Left2, (PLWZ memri34:$rA))>;
-  def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)),
-            (VINSWLX $vDi, InsertEltShift.Sub32Left2, (LWZX memrr:$rA))>;
 
   def : Pat<(v2f64 (insertelt v2f64:$vDi,  f64:$A, i64:$rB)),
             (VINSDLX $vDi, InsertEltShift.Left3, Bitcast.DblToLong)>;
-  def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)),
+  def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load DSForm:$rA)), i64:$rB)),
             (VINSDLX $vDi, InsertEltShift.Left3, (LD memrix:$rA))>;
-  def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)),
+  def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load PDForm:$rA)), i64:$rB)),
             (VINSDLX $vDi, InsertEltShift.Left3, (PLD memri34:$rA))>;
-  def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)),
+  def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load XForm:$rA)), i64:$rB)),
             (VINSDLX $vDi, InsertEltShift.Left3, (LDX memrr:$rA))>;
 }
 
@@ -2904,15 +2880,6 @@ let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX, IsBigEndian] in {
     foreach Idx = [0, 1, 2, 3] in {
       def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, (Ty Idx))),
                (VINSW $vDi, !mul(Idx, 4), $rA)>;
-      def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)),
-                                  (Ty Idx))),
-               (VINSW $vDi, !mul(Idx, 4), (LWZ memri:$rA))>;
-      def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)),
-                                  (Ty Idx))),
-               (VINSW $vDi, !mul(Idx, 4), (PLWZ memri34:$rA))>;
-      def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)),
-                                  (Ty Idx))),
-               (VINSW $vDi, !mul(Idx, 4), (LWZX memrr:$rA))>;
     }
   }
 
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index a2ea34fe11c7..01f36e6dcdd2 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -2266,8 +2266,8 @@ void RISCVAsmParser::emitLoadImm(MCRegister DestReg, int64_t Value,
     if (Inst.Opc == RISCV::LUI) {
       emitToStreamer(
           Out, MCInstBuilder(RISCV::LUI).addReg(DestReg).addImm(Inst.Imm));
-    } else if (Inst.Opc == RISCV::ADDUW) {
-      emitToStreamer(Out, MCInstBuilder(RISCV::ADDUW)
+    } else if (Inst.Opc == RISCV::ADD_UW) {
+      emitToStreamer(Out, MCInstBuilder(RISCV::ADD_UW)
                               .addReg(DestReg)
                               .addReg(SrcReg)
                               .addReg(RISCV::X0));
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index 14d0191a505f..1078403a3fd2 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -197,9 +197,9 @@ void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
   // Get byte count of instruction.
   unsigned Size = Desc.getSize();
 
-  // RISCVInstrInfo::getInstSizeInBytes hard-codes the number of expanded
-  // instructions for each pseudo, and must be updated when adding new pseudos
-  // or changing existing ones.
+  // RISCVInstrInfo::getInstSizeInBytes expects that the total size of the
+  // expanded instructions for each pseudo is correct in the Size field of the
+  // tablegen definition for the pseudo.
   if (MI.getOpcode() == RISCV::PseudoCALLReg ||
       MI.getOpcode() == RISCV::PseudoCALL ||
       MI.getOpcode() == RISCV::PseudoTAIL ||
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
index 18858209aa9b..e935179e5f9b 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
@@ -31,7 +31,7 @@ static int getInstSeqCost(RISCVMatInt::InstSeq &Res, bool HasRVC) {
     case RISCV::LUI:
       Compressed = isInt<6>(Instr.Imm);
       break;
-    case RISCV::ADDUW:
+    case RISCV::ADD_UW:
       Compressed = false;
       break;
     }
@@ -123,10 +123,11 @@ static void generateInstSeqImpl(int64_t Val,
     }
   }
 
-  // Try to use SLLIUW for Hi52 when it is uint32 but not int32.
+  // Try to use SLLI_UW for Hi52 when it is uint32 but not int32.
   if (isUInt<32>((uint64_t)Hi52) && !isInt<32>((uint64_t)Hi52) &&
       ActiveFeatures[RISCV::FeatureStdExtZba]) {
-    // Use LUI+ADDI or LUI to compose, then clear the upper 32 bits with SLLIUW.
+    // Use LUI+ADDI or LUI to compose, then clear the upper 32 bits with
+    // SLLI_UW.
     Hi52 = ((uint64_t)Hi52) | (0xffffffffull << 32);
     Unsigned = true;
   }
@@ -134,7 +135,7 @@ static void generateInstSeqImpl(int64_t Val,
   generateInstSeqImpl(Hi52, ActiveFeatures, Res);
 
   if (Unsigned)
-    Res.push_back(RISCVMatInt::Inst(RISCV::SLLIUW, ShiftAmount));
+    Res.push_back(RISCVMatInt::Inst(RISCV::SLLI_UW, ShiftAmount));
   else
     Res.push_back(RISCVMatInt::Inst(RISCV::SLLI, ShiftAmount));
   if (Lo12)
@@ -210,7 +211,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
       uint64_t LeadingOnesVal = Val | maskLeadingOnes<uint64_t>(LeadingZeros);
       TmpSeq.clear();
       generateInstSeqImpl(LeadingOnesVal, ActiveFeatures, TmpSeq);
-      TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADDUW, 0));
+      TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADD_UW, 0));
 
       // Keep the new sequence if it is an improvement.
       if (TmpSeq.size() < Res.size()) {
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index 5b0f27c5e937..e32a8fb010de 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -52,11 +52,17 @@ def HasStdExtZfhmin : Predicate<"Subtarget->hasStdExtZfhmin()">,
 def FeatureStdExtZfh
     : SubtargetFeature<"zfh", "HasStdExtZfh", "true",
                        "'Zfh' (Half-Precision Floating-Point)",
-                       [FeatureStdExtZfhmin, FeatureStdExtF]>;
+                       [FeatureStdExtF]>;
 def HasStdExtZfh : Predicate<"Subtarget->hasStdExtZfh()">,
                              AssemblerPredicate<(all_of FeatureStdExtZfh),
                              "'Zfh' (Half-Precision Floating-Point)">;
 
+def HasStdExtZfhOrZfhmin
+    : Predicate<"Subtarget->hasStdExtZfh() || Subtarget->hasStdExtZfhmin()">,
+                AssemblerPredicate<(any_of FeatureStdExtZfh, FeatureStdExtZfhmin),
+                                   "'Zfh' (Half-Precision Floating-Point) or "
+                                   "'Zfhmin' (Half-Precision Floating-Point Minimal)">;
+
 def FeatureStdExtC
     : SubtargetFeature<"c", "HasStdExtC", "true",
                        "'C' (Compressed Instructions)">;
diff --git a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
index 26ce16486bd9..40ee7ca6bc1c 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
@@ -86,9 +86,9 @@ bool RISCVExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) {
 bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB,
                                        MachineBasicBlock::iterator MBBI,
                                        MachineBasicBlock::iterator &NextMBBI) {
-  // RISCVInstrInfo::getInstSizeInBytes hard-codes the number of expanded
-  // instructions for each pseudo, and must be updated when adding new pseudos
-  // or changing existing ones.
+  // RISCVInstrInfo::getInstSizeInBytes expects that the total size of the       
+  // expanded instructions for each pseudo is correct in the Size field of the   
+  // tablegen definition for the pseudo.
   switch (MBBI->getOpcode()) {
   case RISCV::PseudoAtomicLoadNand32:
     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 80340ee81509..0c5c13db7112 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -92,9 +92,9 @@ bool RISCVExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
 bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MBBI,
                                  MachineBasicBlock::iterator &NextMBBI) {
-  // RISCVInstrInfo::getInstSizeInBytes hard-codes the number of expanded
-  // instructions for each pseudo, and must be updated when adding new pseudos
-  // or changing existing ones.
+  // RISCVInstrInfo::getInstSizeInBytes expects that the total size of the
+  // expanded instructions for each pseudo is correct in the Size field of the
+  // tablegen definition for the pseudo.
   switch (MBBI->getOpcode()) {
   case RISCV::PseudoLLA:
     return expandLoadLocalAddress(MBB, MBBI, NextMBBI);
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 5870502d74d5..6f77428ae721 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -166,8 +166,8 @@ static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
     SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, XLenVT);
     if (Inst.Opc == RISCV::LUI)
       Result = CurDAG->getMachineNode(RISCV::LUI, DL, XLenVT, SDImm);
-    else if (Inst.Opc == RISCV::ADDUW)
-      Result = CurDAG->getMachineNode(RISCV::ADDUW, DL, XLenVT, SrcReg,
+    else if (Inst.Opc == RISCV::ADD_UW)
+      Result = CurDAG->getMachineNode(RISCV::ADD_UW, DL, XLenVT, SrcReg,
                                       CurDAG->getRegister(RISCV::X0, XLenVT));
     else if (Inst.Opc == RISCV::SH1ADD || Inst.Opc == RISCV::SH2ADD ||
              Inst.Opc == RISCV::SH3ADD)
@@ -775,10 +775,10 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
           C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + C3)) << C2)) {
         // Use slli.uw when possible.
         if ((XLen - (C2 + C3)) == 32 && Subtarget->hasStdExtZba()) {
-          SDNode *SLLIUW =
-              CurDAG->getMachineNode(RISCV::SLLIUW, DL, XLenVT, X,
+          SDNode *SLLI_UW =
+              CurDAG->getMachineNode(RISCV::SLLI_UW, DL, XLenVT, X,
                                      CurDAG->getTargetConstant(C2, DL, XLenVT));
-          ReplaceNode(Node, SLLIUW);
+          ReplaceNode(Node, SLLI_UW);
           return;
         }
 
@@ -1811,7 +1811,7 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const {
     case RISCV::CLZW:
     case RISCV::CTZW:
     case RISCV::CPOPW:
-    case RISCV::SLLIUW:
+    case RISCV::SLLI_UW:
     case RISCV::FCVT_H_W:
     case RISCV::FCVT_H_WU:
     case RISCV::FCVT_S_W:
@@ -1830,20 +1830,20 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const {
       if (Bits < (64 - countLeadingZeros(User->getConstantOperandVal(1))))
         return false;
       break;
-    case RISCV::SEXTB:
+    case RISCV::SEXT_B:
       if (Bits < 8)
         return false;
       break;
-    case RISCV::SEXTH:
-    case RISCV::ZEXTH_RV32:
-    case RISCV::ZEXTH_RV64:
+    case RISCV::SEXT_H:
+    case RISCV::ZEXT_H_RV32:
+    case RISCV::ZEXT_H_RV64:
       if (Bits < 16)
         return false;
       break;
-    case RISCV::ADDUW:
-    case RISCV::SH1ADDUW:
-    case RISCV::SH2ADDUW:
-    case RISCV::SH3ADDUW:
+    case RISCV::ADD_UW:
+    case RISCV::SH1ADD_UW:
+    case RISCV::SH2ADD_UW:
+    case RISCV::SH3ADD_UW:
       // The first operand to add.uw/shXadd.uw is implicitly zero extended from
       // 32 bits.
       if (UI.getOperandNo() != 0 || Bits < 32)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5cc3aa35d4d2..97d24c8e9c0b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -282,6 +282,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
                        (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb())
                            ? Legal
                            : Expand);
+    // Zbkb can use rev8+brev8 to implement bitreverse.
+    setOperationAction(ISD::BITREVERSE, XLenVT,
+                       Subtarget.hasStdExtZbkb() ? Custom : Expand);
   }
 
   if (Subtarget.hasStdExtZbb()) {
@@ -1082,6 +1085,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setTargetDAGCombine(ISD::SHL);
     setTargetDAGCombine(ISD::STORE);
   }
+
+  setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
+  setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
 }
 
 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
@@ -1115,17 +1121,15 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
   case Intrinsic::riscv_masked_atomicrmw_min_i32:
   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
-  case Intrinsic::riscv_masked_cmpxchg_i32: {
-    PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
+  case Intrinsic::riscv_masked_cmpxchg_i32:
     Info.opc = ISD::INTRINSIC_W_CHAIN;
-    Info.memVT = MVT::getVT(PtrTy->getPointerElementType());
+    Info.memVT = MVT::i32;
     Info.ptrVal = I.getArgOperand(0);
     Info.offset = 0;
     Info.align = Align(4);
     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
                  MachineMemOperand::MOVolatile;
     return true;
-  }
   case Intrinsic::riscv_masked_strided_load:
     Info.opc = ISD::INTRINSIC_W_CHAIN;
     Info.ptrVal = I.getArgOperand(1);
@@ -2952,17 +2956,26 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
     return LowerINTRINSIC_VOID(Op, DAG);
   case ISD::BSWAP:
   case ISD::BITREVERSE: {
-    // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
-    assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
     MVT VT = Op.getSimpleValueType();
     SDLoc DL(Op);
-    // Start with the maximum immediate value which is the bitwidth - 1.
-    unsigned Imm = VT.getSizeInBits() - 1;
-    // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
-    if (Op.getOpcode() == ISD::BSWAP)
-      Imm &= ~0x7U;
-    return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0),
-                       DAG.getConstant(Imm, DL, VT));
+    if (Subtarget.hasStdExtZbp()) {
+      // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
+      // Start with the maximum immediate value which is the bitwidth - 1.
+      unsigned Imm = VT.getSizeInBits() - 1;
+      // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
+      if (Op.getOpcode() == ISD::BSWAP)
+        Imm &= ~0x7U;
+      return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0),
+                         DAG.getConstant(Imm, DL, VT));
+    }
+    assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
+    assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
+    // Expand bitreverse to a bswap(rev8) followed by brev8.
+    SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
+    // We use the Zbp grevi encoding for rev.b/brev8 which will be recognized
+    // as brev8 by an isel pattern.
+    return DAG.getNode(RISCVISD::GREV, DL, VT, BSwap,
+                       DAG.getConstant(7, DL, VT));
   }
   case ISD::FSHL:
   case ISD::FSHR: {
@@ -3063,6 +3076,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
     // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
     // vscale as VLENB / 8.
     static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
+    if (Subtarget.getMinVLen() < RISCV::RVVBitsPerBlock)
+      report_fatal_error("Support for VLEN==32 is incomplete.");
     if (isa<ConstantSDNode>(Op.getOperand(0))) {
       // We assume VLENB is a multiple of 8. We manually choose the best shift
       // here because SimplifyDemandedBits isn't always able to simplify it.
@@ -4288,8 +4303,47 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
   MVT XLenVT = Subtarget.getXLenVT();
 
   if (VecVT.getVectorElementType() == MVT::i1) {
-    // FIXME: For now we just promote to an i8 vector and extract from that,
-    // but this is probably not optimal.
+    if (VecVT.isFixedLengthVector()) {
+      unsigned NumElts = VecVT.getVectorNumElements();
+      if (NumElts >= 8) {
+        MVT WideEltVT;
+        unsigned WidenVecLen;
+        SDValue ExtractElementIdx;
+        SDValue ExtractBitIdx;
+        unsigned MaxEEW = Subtarget.getMaxELENForFixedLengthVectors();
+        MVT LargestEltVT = MVT::getIntegerVT(
+            std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
+        if (NumElts <= LargestEltVT.getSizeInBits()) {
+          assert(isPowerOf2_32(NumElts) &&
+                 "the number of elements should be power of 2");
+          WideEltVT = MVT::getIntegerVT(NumElts);
+          WidenVecLen = 1;
+          ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
+          ExtractBitIdx = Idx;
+        } else {
+          WideEltVT = LargestEltVT;
+          WidenVecLen = NumElts / WideEltVT.getSizeInBits();
+          // extract element index = index / element width
+          ExtractElementIdx = DAG.getNode(
+              ISD::SRL, DL, XLenVT, Idx,
+              DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
+          // mask bit index = index % element width
+          ExtractBitIdx = DAG.getNode(
+              ISD::AND, DL, XLenVT, Idx,
+              DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
+        }
+        MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
+        Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
+        SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
+                                         Vec, ExtractElementIdx);
+        // Extract the bit from GPR.
+        SDValue ShiftRight =
+            DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
+        return DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
+                           DAG.getConstant(1, DL, XLenVT));
+      }
+    }
+    // Otherwise, promote to an i8 vector and extract from that.
     MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
     Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
@@ -4411,15 +4465,30 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     return DAG.getRegister(RISCV::X4, PtrVT);
   }
   case Intrinsic::riscv_orc_b:
-    // Lower to the GORCI encoding for orc.b.
-    return DAG.getNode(RISCVISD::GORC, DL, XLenVT, Op.getOperand(1),
+  case Intrinsic::riscv_brev8: {
+    // Lower to the GORCI encoding for orc.b or the GREVI encoding for brev8.
+    unsigned Opc =
+        IntNo == Intrinsic::riscv_brev8 ? RISCVISD::GREV : RISCVISD::GORC;
+    return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1),
                        DAG.getConstant(7, DL, XLenVT));
+  }
   case Intrinsic::riscv_grev:
   case Intrinsic::riscv_gorc: {
     unsigned Opc =
         IntNo == Intrinsic::riscv_grev ? RISCVISD::GREV : RISCVISD::GORC;
     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
   }
+  case Intrinsic::riscv_zip:
+  case Intrinsic::riscv_unzip: {
+    // Lower to the SHFLI encoding for zip or the UNSHFLI encoding for unzip.
+    // For i32 the immdiate is 15. For i64 the immediate is 31.
+    unsigned Opc =
+        IntNo == Intrinsic::riscv_zip ? RISCVISD::SHFL : RISCVISD::UNSHFL;
+    unsigned BitWidth = Op.getValueSizeInBits();
+    assert(isPowerOf2_32(BitWidth) && BitWidth >= 2 && "Unexpected bit width");
+    return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1),
+                       DAG.getConstant((BitWidth / 2) - 1, DL, XLenVT));
+  }
   case Intrinsic::riscv_shfl:
   case Intrinsic::riscv_unshfl: {
     unsigned Opc =
@@ -5829,14 +5898,17 @@ SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
     }
   }
 
-  if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
-      IndexVT = IndexVT.changeVectorElementType(XLenVT);
-      Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
-  }
-
   if (!VL)
     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
 
+  if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
+    IndexVT = IndexVT.changeVectorElementType(XLenVT);
+    SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, Mask.getValueType(),
+                                   VL);
+    Index = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, IndexVT, Index,
+                        TrueMask, VL);
+  }
+
   unsigned IntID =
       IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
@@ -5937,14 +6009,17 @@ SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
     }
   }
 
-  if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
-      IndexVT = IndexVT.changeVectorElementType(XLenVT);
-      Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
-  }
-
   if (!VL)
     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
 
+  if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
+    IndexVT = IndexVT.changeVectorElementType(XLenVT);
+    SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, Mask.getValueType(),
+                                   VL);
+    Index = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, IndexVT, Index,
+                        TrueMask, VL);
+  }
+
   unsigned IntID =
       IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
@@ -6568,7 +6643,11 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
       unsigned Opc =
           IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFLW : RISCVISD::UNSHFLW;
-      if (isa<ConstantSDNode>(N->getOperand(2))) {
+      // There is no (UN)SHFLIW. If the control word is a constant, we can use
+      // (UN)SHFLI with bit 4 of the control word cleared. The upper 32 bit half
+      // will be shuffled the same way as the lower 32 bit half, but the two
+      // halves won't cross.
+      if (isa<ConstantSDNode>(NewOp2)) {
         NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
                              DAG.getConstant(0xf, DL, MVT::i64));
         Opc =
@@ -7284,8 +7363,8 @@ static SDValue performANY_EXTENDCombine(SDNode *N,
   return SDValue(N, 0);
 }
 
-// Try to form VWMUL or VWMULU.
-// FIXME: Support VWMULSU.
+// Try to form VWMUL, VWMULU or VWMULSU.
+// TODO: Support VWMULSU.vx with a sign extend Op and a splat of scalar Op.
 static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG,
                                        bool Commute) {
   assert(N->getOpcode() == RISCVISD::MUL_VL && "Unexpected opcode");
@@ -7296,6 +7375,7 @@ static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG,
 
   bool IsSignExt = Op0.getOpcode() == RISCVISD::VSEXT_VL;
   bool IsZeroExt = Op0.getOpcode() == RISCVISD::VZEXT_VL;
+  bool IsVWMULSU = IsSignExt && Op1.getOpcode() == RISCVISD::VZEXT_VL;
   if ((!IsSignExt && !IsZeroExt) || !Op0.hasOneUse())
     return SDValue();
 
@@ -7316,7 +7396,7 @@ static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG,
   SDLoc DL(N);
 
   // See if the other operand is the same opcode.
-  if (Op0.getOpcode() == Op1.getOpcode()) {
+  if (IsVWMULSU || Op0.getOpcode() == Op1.getOpcode()) {
     if (!Op1.hasOneUse())
       return SDValue();
 
@@ -7366,7 +7446,9 @@ static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG,
   if (Op1.getValueType() != NarrowVT)
     Op1 = DAG.getNode(ExtOpc, DL, NarrowVT, Op1, Mask, VL);
 
-  unsigned WMulOpc = IsSignExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL;
+  unsigned WMulOpc = RISCVISD::VWMULSU_VL;
+  if (!IsVWMULSU)
+    WMulOpc = IsSignExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL;
   return DAG.getNode(WMulOpc, DL, VT, Op0, Op1, Mask, VL);
 }
 
@@ -8194,12 +8276,17 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
     }
     break;
   }
-  case RISCVISD::READ_VLENB:
-    // We assume VLENB is at least 16 bytes.
-    Known.Zero.setLowBits(4);
+  case RISCVISD::READ_VLENB: {
+    // If we know the minimum VLen from Zvl extensions, we can use that to
+    // determine the trailing zeros of VLENB.
+    // FIXME: Limit to 128 bit vectors until we have more testing.
+    unsigned MinVLenB = std::min(128U, Subtarget.getMinVLen()) / 8;
+    if (MinVLenB > 0)
+      Known.Zero.setLowBits(Log2_32(MinVLenB));
     // We assume VLENB is no more than 65536 / 8 bytes.
     Known.Zero.setBitsFrom(14);
     break;
+  }
   case ISD::INTRINSIC_W_CHAIN:
   case ISD::INTRINSIC_WO_CHAIN: {
     unsigned IntNo =
@@ -8230,9 +8317,11 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
   default:
     break;
   case RISCVISD::SELECT_CC: {
-    unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
+    unsigned Tmp =
+        DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
     if (Tmp == 1) return 1;  // Early out.
-    unsigned Tmp2 = DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
+    unsigned Tmp2 =
+        DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
     return std::min(Tmp, Tmp2);
   }
   case RISCVISD::SLLW:
@@ -8275,15 +8364,18 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
     }
     break;
   }
-  case RISCVISD::VMV_X_S:
+  case RISCVISD::VMV_X_S: {
     // The number of sign bits of the scalar result is computed by obtaining the
     // element type of the input vector operand, subtracting its width from the
     // XLEN, and then adding one (sign bit within the element type). If the
     // element type is wider than XLen, the least-significant XLEN bits are
     // taken.
-    if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen())
-      return 1;
-    return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1;
+    unsigned XLen = Subtarget.getXLen();
+    unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
+    if (EltBits <= XLen)
+      return XLen - EltBits + 1;
+    break;
+  }
   }
 
   return 1;
@@ -10129,6 +10221,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(FP_ROUND_VL)
   NODE_NAME_CASE(VWMUL_VL)
   NODE_NAME_CASE(VWMULU_VL)
+  NODE_NAME_CASE(VWMULSU_VL)
   NODE_NAME_CASE(VWADDU_VL)
   NODE_NAME_CASE(SETCC_VL)
   NODE_NAME_CASE(VSELECT_VL)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 58b7ec89f875..840a821870a7 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -245,6 +245,7 @@ enum NodeType : unsigned {
   // Widening instructions
   VWMUL_VL,
   VWMULU_VL,
+  VWMULSU_VL,
   VWADDU_VL,
 
   // Vector compare producing a mask. Fourth operand is input mask. Fifth
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index d39e0805a79c..649eb57b325b 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -999,6 +999,12 @@ bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
 
 void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
   VSETVLIInfo CurInfo;
+  // BBLocalInfo tracks the VL/VTYPE state the same way BBInfo.Change was
+  // calculated in computeIncomingVLVTYPE. We need this to apply
+  // canSkipVSETVLIForLoadStore the same way computeIncomingVLVTYPE did. We
+  // can't include predecessor information in that decision to avoid disagreeing
+  // with the global analysis.
+  VSETVLIInfo BBLocalInfo;
   // Only be set if current VSETVLIInfo is from an explicit VSET(I)VLI.
   MachineInstr *PrevVSETVLIMI = nullptr;
 
@@ -1014,6 +1020,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
       MI.getOperand(3).setIsDead(false);
       MI.getOperand(4).setIsDead(false);
       CurInfo = getInfoForVSETVLI(MI);
+      BBLocalInfo = getInfoForVSETVLI(MI);
       PrevVSETVLIMI = &MI;
       continue;
     }
@@ -1043,12 +1050,22 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
         // use the predecessor information.
         assert(BlockInfo[MBB.getNumber()].Pred.isValid() &&
                "Expected a valid predecessor state.");
-        if (needVSETVLI(NewInfo, BlockInfo[MBB.getNumber()].Pred) &&
+        // Don't use predecessor information if there was an earlier instruction
+        // in this block that allowed a vsetvli to be skipped for load/store.
+        if (!(BBLocalInfo.isValid() &&
+              canSkipVSETVLIForLoadStore(MI, NewInfo, BBLocalInfo)) &&
+            needVSETVLI(NewInfo, BlockInfo[MBB.getNumber()].Pred) &&
             needVSETVLIPHI(NewInfo, MBB)) {
           insertVSETVLI(MBB, MI, NewInfo, BlockInfo[MBB.getNumber()].Pred);
           CurInfo = NewInfo;
+          BBLocalInfo = NewInfo;
         }
+
+        // We must update BBLocalInfo for every vector instruction.
+        if (!BBLocalInfo.isValid())
+          BBLocalInfo = NewInfo;
       } else {
+        assert(BBLocalInfo.isValid());
         // If this instruction isn't compatible with the previous VL/VTYPE
         // we need to insert a VSETVLI.
         // If this is a unit-stride or strided load/store, we may be able to use
@@ -1084,6 +1101,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
           if (NeedInsertVSETVLI)
             insertVSETVLI(MBB, MI, NewInfo, CurInfo);
           CurInfo = NewInfo;
+          BBLocalInfo = NewInfo;
         }
       }
       PrevVSETVLIMI = nullptr;
@@ -1094,6 +1112,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
     if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
         MI.modifiesRegister(RISCV::VTYPE)) {
       CurInfo = VSETVLIInfo::getUnknown();
+      BBLocalInfo = VSETVLIInfo::getUnknown();
       PrevVSETVLIMI = nullptr;
     }
   }
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 7baed2793e4e..55f4a19b79eb 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -654,8 +654,8 @@ void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,
       BuildMI(MBB, MBBI, DL, get(RISCV::LUI), Result)
           .addImm(Inst.Imm)
           .setMIFlag(Flag);
-    } else if (Inst.Opc == RISCV::ADDUW) {
-      BuildMI(MBB, MBBI, DL, get(RISCV::ADDUW), Result)
+    } else if (Inst.Opc == RISCV::ADD_UW) {
+      BuildMI(MBB, MBBI, DL, get(RISCV::ADD_UW), Result)
           .addReg(SrcReg, RegState::Kill)
           .addReg(RISCV::X0)
           .setMIFlag(Flag);
@@ -965,93 +965,29 @@ bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
 }
 
 unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
+  if (MI.isMetaInstruction())
+    return 0;
+
   unsigned Opcode = MI.getOpcode();
 
-  switch (Opcode) {
-  default: {
-    if (MI.getParent() && MI.getParent()->getParent()) {
-      const auto MF = MI.getMF();
-      const auto &TM = static_cast<const RISCVTargetMachine &>(MF->getTarget());
-      const MCRegisterInfo &MRI = *TM.getMCRegisterInfo();
-      const MCSubtargetInfo &STI = *TM.getMCSubtargetInfo();
-      const RISCVSubtarget &ST = MF->getSubtarget<RISCVSubtarget>();
-      if (isCompressibleInst(MI, &ST, MRI, STI))
-        return 2;
-    }
-    return get(Opcode).getSize();
-  }
-  case TargetOpcode::EH_LABEL:
-  case TargetOpcode::IMPLICIT_DEF:
-  case TargetOpcode::KILL:
-  case TargetOpcode::DBG_VALUE:
-    return 0;
-  // These values are determined based on RISCVExpandAtomicPseudoInsts,
-  // RISCVExpandPseudoInsts and RISCVMCCodeEmitter, depending on where the
-  // pseudos are expanded.
-  case RISCV::PseudoCALLReg:
-  case RISCV::PseudoCALL:
-  case RISCV::PseudoJump:
-  case RISCV::PseudoTAIL:
-  case RISCV::PseudoLLA:
-  case RISCV::PseudoLA:
-  case RISCV::PseudoLA_TLS_IE:
-  case RISCV::PseudoLA_TLS_GD:
-    return 8;
-  case RISCV::PseudoAtomicLoadNand32:
-  case RISCV::PseudoAtomicLoadNand64:
-    return 20;
-  case RISCV::PseudoMaskedAtomicSwap32:
-  case RISCV::PseudoMaskedAtomicLoadAdd32:
-  case RISCV::PseudoMaskedAtomicLoadSub32:
-    return 28;
-  case RISCV::PseudoMaskedAtomicLoadNand32:
-    return 32;
-  case RISCV::PseudoMaskedAtomicLoadMax32:
-  case RISCV::PseudoMaskedAtomicLoadMin32:
-    return 44;
-  case RISCV::PseudoMaskedAtomicLoadUMax32:
-  case RISCV::PseudoMaskedAtomicLoadUMin32:
-    return 36;
-  case RISCV::PseudoCmpXchg32:
-  case RISCV::PseudoCmpXchg64:
-    return 16;
-  case RISCV::PseudoMaskedCmpXchg32:
-    return 32;
-  case TargetOpcode::INLINEASM:
-  case TargetOpcode::INLINEASM_BR: {
+  if (Opcode == TargetOpcode::INLINEASM ||
+      Opcode == TargetOpcode::INLINEASM_BR) {
     const MachineFunction &MF = *MI.getParent()->getParent();
     const auto &TM = static_cast<const RISCVTargetMachine &>(MF.getTarget());
     return getInlineAsmLength(MI.getOperand(0).getSymbolName(),
                               *TM.getMCAsmInfo());
   }
-  case RISCV::PseudoVSPILL2_M1:
-  case RISCV::PseudoVSPILL2_M2:
-  case RISCV::PseudoVSPILL2_M4:
-  case RISCV::PseudoVSPILL3_M1:
-  case RISCV::PseudoVSPILL3_M2:
-  case RISCV::PseudoVSPILL4_M1:
-  case RISCV::PseudoVSPILL4_M2:
-  case RISCV::PseudoVSPILL5_M1:
-  case RISCV::PseudoVSPILL6_M1:
-  case RISCV::PseudoVSPILL7_M1:
-  case RISCV::PseudoVSPILL8_M1:
-  case RISCV::PseudoVRELOAD2_M1:
-  case RISCV::PseudoVRELOAD2_M2:
-  case RISCV::PseudoVRELOAD2_M4:
-  case RISCV::PseudoVRELOAD3_M1:
-  case RISCV::PseudoVRELOAD3_M2:
-  case RISCV::PseudoVRELOAD4_M1:
-  case RISCV::PseudoVRELOAD4_M2:
-  case RISCV::PseudoVRELOAD5_M1:
-  case RISCV::PseudoVRELOAD6_M1:
-  case RISCV::PseudoVRELOAD7_M1:
-  case RISCV::PseudoVRELOAD8_M1: {
-    // The values are determined based on expandVSPILL and expandVRELOAD that
-    // expand the pseudos depending on NF.
-    unsigned NF = isRVVSpillForZvlsseg(Opcode)->first;
-    return 4 * (2 * NF - 1);
-  }
+
+  if (MI.getParent() && MI.getParent()->getParent()) {
+    const auto MF = MI.getMF();
+    const auto &TM = static_cast<const RISCVTargetMachine &>(MF->getTarget());
+    const MCRegisterInfo &MRI = *TM.getMCRegisterInfo();
+    const MCSubtargetInfo &STI = *TM.getMCSubtargetInfo();
+    const RISCVSubtarget &ST = MF->getSubtarget<RISCVSubtarget>();
+    if (isCompressibleInst(MI, &ST, MRI, STI))
+      return 2;
   }
+  return get(Opcode).getSize();
 }
 
 bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 64cd89cda06a..ee6a74b7f14f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1183,7 +1183,7 @@ def : Pat<(brind (add GPRJALR:$rs1, simm12:$imm12)),
 // destination.
 // Define AsmString to print "call" when compile with -S flag.
 // Define isCodeGenOnly = 0 to support parsing assembly "call" instruction.
-let isCall = 1, isBarrier = 1, isCodeGenOnly = 0, hasSideEffects = 0,
+let isCall = 1, isBarrier = 1, isCodeGenOnly = 0, Size = 8, hasSideEffects = 0,
     mayStore = 0, mayLoad = 0 in
 def PseudoCALLReg : Pseudo<(outs GPR:$rd), (ins call_symbol:$func), []> {
   let AsmString = "call\t$rd, $func";
@@ -1195,7 +1195,7 @@ def PseudoCALLReg : Pseudo<(outs GPR:$rd), (ins call_symbol:$func), []> {
 // if the offset fits in a signed 21-bit immediate.
 // Define AsmString to print "call" when compile with -S flag.
 // Define isCodeGenOnly = 0 to support parsing assembly "call" instruction.
-let isCall = 1, Defs = [X1], isCodeGenOnly = 0 in
+let isCall = 1, Defs = [X1], isCodeGenOnly = 0, Size = 8 in
 def PseudoCALL : Pseudo<(outs), (ins call_symbol:$func), []> {
   let AsmString = "call\t$func";
 }
@@ -1220,7 +1220,7 @@ def PseudoRET : Pseudo<(outs), (ins), [(riscv_ret_flag)]>,
 // expand to auipc and jalr while encoding.
 // Define AsmString to print "tail" when compile with -S flag.
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [X2],
-    isCodeGenOnly = 0 in
+    Size = 8, isCodeGenOnly = 0 in
 def PseudoTAIL : Pseudo<(outs), (ins call_symbol:$dst), []> {
   let AsmString = "tail\t$dst";
 }
@@ -1235,28 +1235,28 @@ def : Pat<(riscv_tail (iPTR tglobaladdr:$dst)),
 def : Pat<(riscv_tail (iPTR texternalsym:$dst)),
           (PseudoTAIL texternalsym:$dst)>;
 
-let isCall = 0, isBarrier = 1, isBranch = 1, isTerminator = 1,
+let isCall = 0, isBarrier = 1, isBranch = 1, isTerminator = 1, Size = 8,
     isCodeGenOnly = 0, hasSideEffects = 0, mayStore = 0, mayLoad = 0 in
 def PseudoJump : Pseudo<(outs GPR:$rd), (ins pseudo_jump_symbol:$target), []> {
   let AsmString = "jump\t$target, $rd";
 }
 
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 0,
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 8, isCodeGenOnly = 0,
     isAsmParserOnly = 1 in
 def PseudoLLA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
                        "lla", "$dst, $src">;
 
-let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 0,
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 0,
     isAsmParserOnly = 1 in
 def PseudoLA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
                       "la", "$dst, $src">;
 
-let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 0,
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 0,
     isAsmParserOnly = 1 in
 def PseudoLA_TLS_IE : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
                              "la.tls.ie", "$dst, $src">;
 
-let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 0,
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 0,
     isAsmParserOnly = 1 in
 def PseudoLA_TLS_GD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
                              "la.tls.gd", "$dst, $src">;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index ee10c3a54b2f..7d23dafb0346 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -188,6 +188,7 @@ class PseudoAMO : Pseudo<(outs GPR:$res, GPR:$scratch),
   let hasSideEffects = 0;
 }
 
+let Size = 20 in
 def PseudoAtomicLoadNand32 : PseudoAMO;
 // Ordering constants must be kept in sync with the AtomicOrdering enum in
 // AtomicOrdering.h.
@@ -242,27 +243,35 @@ class PseudoMaskedAMOMinMaxPat<Intrinsic intrin, Pseudo AMOInst>
           (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
            timm:$ordering)>;
 
+let Size = 28 in
 def PseudoMaskedAtomicSwap32 : PseudoMaskedAMO;
 def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i32,
                          PseudoMaskedAtomicSwap32>;
+let Size = 28 in
 def PseudoMaskedAtomicLoadAdd32 : PseudoMaskedAMO;
 def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_add_i32,
                          PseudoMaskedAtomicLoadAdd32>;
+let Size = 28 in
 def PseudoMaskedAtomicLoadSub32 : PseudoMaskedAMO;
 def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_sub_i32,
                          PseudoMaskedAtomicLoadSub32>;
+let Size = 32 in
 def PseudoMaskedAtomicLoadNand32 : PseudoMaskedAMO;
 def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_nand_i32,
                          PseudoMaskedAtomicLoadNand32>;
+let Size = 44 in
 def PseudoMaskedAtomicLoadMax32 : PseudoMaskedAMOMinMax;
 def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_max_i32,
                                PseudoMaskedAtomicLoadMax32>;
+let Size = 44 in
 def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMOMinMax;
 def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_min_i32,
                                PseudoMaskedAtomicLoadMin32>;
+let Size = 36 in
 def PseudoMaskedAtomicLoadUMax32 : PseudoMaskedAMOUMinUMax;
 def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax_i32,
                          PseudoMaskedAtomicLoadUMax32>;
+let Size = 36 in
 def PseudoMaskedAtomicLoadUMin32 : PseudoMaskedAMOUMinUMax;
 def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i32,
                          PseudoMaskedAtomicLoadUMin32>;
@@ -276,6 +285,7 @@ class PseudoCmpXchg
   let mayLoad = 1;
   let mayStore = 1;
   let hasSideEffects = 0;
+  let Size = 16;
 }
 
 // Ordering constants must be kept in sync with the AtomicOrdering enum in
@@ -304,6 +314,7 @@ def PseudoMaskedCmpXchg32
   let mayLoad = 1;
   let mayStore = 1;
   let hasSideEffects = 0;
+  let Size = 32;
 }
 
 def : Pat<(int_riscv_masked_cmpxchg_i32
@@ -347,6 +358,7 @@ def : Pat<(i64 (atomic_load_sub_64_seq_cst GPR:$addr, GPR:$incr)),
 
 /// 64-bit pseudo AMOs
 
+let Size = 20 in
 def PseudoAtomicLoadNand64 : PseudoAMO;
 // Ordering constants must be kept in sync with the AtomicOrdering enum in
 // AtomicOrdering.h.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 4e7e251bc412..9087ed50f9fc 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -3836,7 +3836,7 @@ multiclass VPatConversionVF_WF <string intrinsic, string instruction> {
 }
 
 multiclass VPatCompare_VI<string intrinsic, string inst,
-                          ImmLeaf ImmType = simm5_plus1> {
+                          ImmLeaf ImmType> {
   foreach vti = AllIntegerVectors in {
     defvar Intr = !cast<Intrinsic>(intrinsic);
     defvar Pseudo = !cast<Instruction>(inst#"_VI_"#vti.LMul.MX);
@@ -3899,11 +3899,13 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 1 in {
 foreach lmul = MxList in {
   foreach nf = NFSet<lmul>.L in {
     defvar vreg = SegRegClass<lmul, nf>.RC;
-    let hasSideEffects = 0, mayLoad = 0, mayStore = 1, isCodeGenOnly = 1 in {
+    let hasSideEffects = 0, mayLoad = 0, mayStore = 1, isCodeGenOnly = 1,
+        Size = !mul(4, !sub(!mul(nf, 2), 1)) in {
       def "PseudoVSPILL" # nf # "_" # lmul.MX :
         Pseudo<(outs), (ins vreg:$rs1, GPR:$rs2, GPR:$vlenb), []>;
     }
-    let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 1 in {
+    let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 1,
+        Size = !mul(4, !sub(!mul(nf, 2), 1)) in {
       def "PseudoVRELOAD" # nf # "_" # lmul.MX :
         Pseudo<(outs vreg:$rs1), (ins GPR:$rs2, GPR:$vlenb), []>;
     }
@@ -4657,13 +4659,15 @@ defm : VPatBinarySwappedM_VV<"int_riscv_vmsgt", "PseudoVMSLT", AllIntegerVectors
 defm : VPatBinarySwappedM_VV<"int_riscv_vmsgeu", "PseudoVMSLEU", AllIntegerVectors>;
 defm : VPatBinarySwappedM_VV<"int_riscv_vmsge", "PseudoVMSLE", AllIntegerVectors>;
 
-// Match vmslt(u).vx intrinsics to vmsle(u).vi if the scalar is -15 to 16. This
-// avoids the user needing to know that there is no vmslt(u).vi instruction.
-// Similar for vmsge(u).vx intrinsics using vmslt(u).vi.
-defm : VPatCompare_VI<"int_riscv_vmslt", "PseudoVMSLE">;
+// Match vmslt(u).vx intrinsics to vmsle(u).vi if the scalar is -15 to 16 and
+// non-zero. Zero can be .vx with x0. This avoids the user needing to know that
+// there is no vmslt(u).vi instruction. Similar for vmsge(u).vx intrinsics
+// using vmslt(u).vi.
+defm : VPatCompare_VI<"int_riscv_vmslt", "PseudoVMSLE", simm5_plus1_nonzero>;
 defm : VPatCompare_VI<"int_riscv_vmsltu", "PseudoVMSLEU", simm5_plus1_nonzero>;
 
-defm : VPatCompare_VI<"int_riscv_vmsge", "PseudoVMSGT">;
+// We need to handle 0 for vmsge.vi using vmslt.vi because there is no vmsge.vx.
+defm : VPatCompare_VI<"int_riscv_vmsge", "PseudoVMSGT", simm5_plus1>;
 defm : VPatCompare_VI<"int_riscv_vmsgeu", "PseudoVMSGTU", simm5_plus1_nonzero>;
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index e452a84a9a6f..2b920d29ab81 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -539,7 +539,7 @@ defm : VPatIntegerSetCCSDNode_VV_VX_VI<SETNE,  "PseudoVMSNE">;
 defm : VPatIntegerSetCCSDNode_VV_VX<SETLT,  "PseudoVMSLT">;
 defm : VPatIntegerSetCCSDNode_VV_VX<SETULT, "PseudoVMSLTU">;
 defm : VPatIntegerSetCCSDNode_VIPlus1<SETLT, "PseudoVMSLE",
-                                      SplatPat_simm5_plus1>;
+                                      SplatPat_simm5_plus1_nonzero>;
 defm : VPatIntegerSetCCSDNode_VIPlus1<SETULT, "PseudoVMSLEU",
                                       SplatPat_simm5_plus1_nonzero>;
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 964f0fa54512..e71c498fd5f4 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -228,6 +228,7 @@ def SDT_RISCVVWBinOp_VL : SDTypeProfile<1, 4, [SDTCisVec<0>,
                                                SDTCisVT<4, XLenVT>]>;
 def riscv_vwmul_vl  : SDNode<"RISCVISD::VWMUL_VL",  SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
 def riscv_vwmulu_vl : SDNode<"RISCVISD::VWMULU_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
+def riscv_vwmulsu_vl : SDNode<"RISCVISD::VWMULSU_VL", SDT_RISCVVWBinOp_VL>;
 def riscv_vwaddu_vl : SDNode<"RISCVISD::VWADDU_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
 
 def SDTRVVVecReduce : SDTypeProfile<1, 5, [
@@ -832,7 +833,7 @@ foreach vti = AllIntegerVectors in {
   defm : VPatIntegerSetCCVL_VI_Swappable<vti, "PseudoVMSGTU", SETUGT, SETULT>;
 
   defm : VPatIntegerSetCCVL_VIPlus1<vti, "PseudoVMSLE",  SETLT,
-                                    SplatPat_simm5_plus1>;
+                                    SplatPat_simm5_plus1_nonzero>;
   defm : VPatIntegerSetCCVL_VIPlus1<vti, "PseudoVMSLEU", SETULT,
                                     SplatPat_simm5_plus1_nonzero>;
   defm : VPatIntegerSetCCVL_VIPlus1<vti, "PseudoVMSGT",  SETGE,
@@ -861,6 +862,7 @@ defm : VPatBinaryVL_VV_VX<riscv_srem_vl, "PseudoVREM">;
 // 12.12. Vector Widening Integer Multiply Instructions
 defm : VPatBinaryWVL_VV_VX<riscv_vwmul_vl, "PseudoVWMUL">;
 defm : VPatBinaryWVL_VV_VX<riscv_vwmulu_vl, "PseudoVWMULU">;
+defm : VPatBinaryWVL_VV_VX<riscv_vwmulsu_vl, "PseudoVWMULSU">;
 
 // 12.13 Vector Single-Width Integer Multiply-Add Instructions
 foreach vti = AllIntegerVectors in {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index db3f5851879a..07884d35f63c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -337,13 +337,39 @@ def SH3ADD : ALU_rr<0b0010000, 0b110, "sh3add">,
              Sched<[WriteSHXADD, ReadSHXADD, ReadSHXADD]>;
 } // Predicates = [HasStdExtZba]
 
+let Predicates = [HasStdExtZba, IsRV64] in {
+def SLLI_UW : RVBShift_ri<0b00001, 0b001, OPC_OP_IMM_32, "slli.uw">,
+              Sched<[WriteShiftImm32, ReadShiftImm32]>;
+def ADD_UW : ALUW_rr<0b0000100, 0b000, "add.uw">,
+             Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>;
+def SH1ADD_UW : ALUW_rr<0b0010000, 0b010, "sh1add.uw">,
+                Sched<[WriteSHXADD32, ReadSHXADD32, ReadSHXADD32]>;
+def SH2ADD_UW : ALUW_rr<0b0010000, 0b100, "sh2add.uw">,
+                Sched<[WriteSHXADD32, ReadSHXADD32, ReadSHXADD32]>;
+def SH3ADD_UW : ALUW_rr<0b0010000, 0b110, "sh3add.uw">,
+                Sched<[WriteSHXADD32, ReadSHXADD32, ReadSHXADD32]>;
+} // Predicates = [HasStdExtZbb, IsRV64]
+
 let Predicates = [HasStdExtZbbOrZbpOrZbkb] in {
 def ROL   : ALU_rr<0b0110000, 0b001, "rol">,
             Sched<[WriteRotateReg, ReadRotateReg, ReadRotateReg]>;
 def ROR   : ALU_rr<0b0110000, 0b101, "ror">,
             Sched<[WriteRotateReg, ReadRotateReg, ReadRotateReg]>;
+
+def RORI  : RVBShift_ri<0b01100, 0b101, OPC_OP_IMM, "rori">,
+            Sched<[WriteRotateImm, ReadRotateImm]>;
 } // Predicates = [HasStdExtZbbOrZbpOrZbkb]
 
+let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in {
+def ROLW  : ALUW_rr<0b0110000, 0b001, "rolw">,
+            Sched<[WriteRotateReg32, ReadRotateReg32, ReadRotateReg32]>;
+def RORW  : ALUW_rr<0b0110000, 0b101, "rorw">,
+            Sched<[WriteRotateReg32, ReadRotateReg32, ReadRotateReg32]>;
+
+def RORIW : RVBShiftW_ri<0b0110000, 0b101, OPC_OP_IMM_32, "roriw">,
+            Sched<[WriteRotateImm32, ReadRotateImm32]>;
+} // Predicates = [HasStdExtZbbOrZbp, IsRV64]
+
 let Predicates = [HasStdExtZbs] in {
 def BCLR : ALU_rr<0b0100100, 0b001, "bclr">,
            Sched<[WriteSingleBit, ReadSingleBit, ReadSingleBit]>;
@@ -353,27 +379,7 @@ def BINV : ALU_rr<0b0110100, 0b001, "binv">,
            Sched<[WriteSingleBit, ReadSingleBit, ReadSingleBit]>;
 def BEXT : ALU_rr<0b0100100, 0b101, "bext">,
            Sched<[WriteSingleBit, ReadSingleBit, ReadSingleBit]>;
-} // Predicates = [HasStdExtZbs]
-
-let Predicates = [HasStdExtZbp] in {
-def GORC : ALU_rr<0b0010100, 0b101, "gorc">, Sched<[]>;
-def GREV : ALU_rr<0b0110100, 0b101, "grev">, Sched<[]>;
-} // Predicates = [HasStdExtZbp]
 
-let Predicates = [HasStdExtZbpOrZbkx] in {
-def XPERMN : ALU_rr<0b0010100, 0b010, "xperm4">, Sched<[]>;
-def XPERMB : ALU_rr<0b0010100, 0b100, "xperm8">, Sched<[]>;
-} // Predicates = [HasStdExtZbpOrZbkx]
-
-let Predicates = [HasStdExtZbp] in {
-def XPERMH : ALU_rr<0b0010100, 0b110, "xperm.h">, Sched<[]>;
-} // Predicates = [HasStdExtZbp]
-
-let Predicates = [HasStdExtZbbOrZbpOrZbkb] in
-def RORI  : RVBShift_ri<0b01100, 0b101, OPC_OP_IMM, "rori">,
-            Sched<[WriteRotateImm, ReadRotateImm]>;
-
-let Predicates = [HasStdExtZbs] in {
 def BCLRI : RVBShift_ri<0b01001, 0b001, OPC_OP_IMM, "bclri">,
             Sched<[WriteSingleBitImm, ReadSingleBitImm]>;
 def BSETI : RVBShift_ri<0b00101, 0b001, OPC_OP_IMM, "bseti">,
@@ -385,10 +391,42 @@ def BEXTI : RVBShift_ri<0b01001, 0b101, OPC_OP_IMM, "bexti">,
 } // Predicates = [HasStdExtZbs]
 
 let Predicates = [HasStdExtZbp] in {
+def GORC : ALU_rr<0b0010100, 0b101, "gorc">, Sched<[]>;
+def GREV : ALU_rr<0b0110100, 0b101, "grev">, Sched<[]>;
+
 def GREVI : RVBShift_ri<0b01101, 0b101, OPC_OP_IMM, "grevi">, Sched<[]>;
 def GORCI : RVBShift_ri<0b00101, 0b101, OPC_OP_IMM, "gorci">, Sched<[]>;
+
+def SHFL   : ALU_rr<0b0000100, 0b001, "shfl">, Sched<[]>;
+def UNSHFL : ALU_rr<0b0000100, 0b101, "unshfl">, Sched<[]>;
+
+def SHFLI   : RVBShfl_ri<0b0000100, 0b001, OPC_OP_IMM, "shfli">, Sched<[]>;
+def UNSHFLI : RVBShfl_ri<0b0000100, 0b101, OPC_OP_IMM, "unshfli">, Sched<[]>;
+
+def XPERM_H : ALU_rr<0b0010100, 0b110, "xperm.h">, Sched<[]>;
 } // Predicates = [HasStdExtZbp]
 
+let Predicates = [HasStdExtZbp, IsRV64] in {
+def GORCW  : ALUW_rr<0b0010100, 0b101, "gorcw">, Sched<[]>;
+def GREVW  : ALUW_rr<0b0110100, 0b101, "grevw">, Sched<[]>;
+
+def GORCIW : RVBShiftW_ri<0b0010100, 0b101, OPC_OP_IMM_32, "gorciw">, Sched<[]>;
+def GREVIW : RVBShiftW_ri<0b0110100, 0b101, OPC_OP_IMM_32, "greviw">, Sched<[]>;
+
+def SHFLW   : ALUW_rr<0b0000100, 0b001, "shflw">, Sched<[]>;
+def UNSHFLW : ALUW_rr<0b0000100, 0b101, "unshflw">, Sched<[]>;
+
+def XPERM_W : ALU_rr<0b0010100, 0b000, "xperm.w">, Sched<[]>;
+} // Predicates = [HasStdExtZbp, IsRV64]
+
+// These instructions were named xperm.n and xperm.b in the last version of
+// the draft bit manipulation specification they were included in. However, we
+// use the mnemonics given to them in the ratified Zbkx extension.
+let Predicates = [HasStdExtZbpOrZbkx] in {
+def XPERM4 : ALU_rr<0b0010100, 0b010, "xperm4">, Sched<[]>;
+def XPERM8 : ALU_rr<0b0010100, 0b100, "xperm8">, Sched<[]>;
+} // Predicates = [HasStdExtZbpOrZbkx]
+
 let Predicates = [HasStdExtZbt] in {
 def CMIX : RVBTernaryR<0b11, 0b001, OPC_OP, "cmix", "$rd, $rs2, $rs1, $rs3">,
            Sched<[]>;
@@ -402,6 +440,15 @@ def FSRI : RVBTernaryImm6<0b101, OPC_OP_IMM, "fsri",
                           "$rd, $rs1, $rs3, $shamt">, Sched<[]>;
 } // Predicates = [HasStdExtZbt]
 
+let Predicates = [HasStdExtZbt, IsRV64] in {
+def FSLW  : RVBTernaryR<0b10, 0b001, OPC_OP_32,
+                        "fslw", "$rd, $rs1, $rs3, $rs2">, Sched<[]>;
+def FSRW  : RVBTernaryR<0b10, 0b101, OPC_OP_32, "fsrw",
+                        "$rd, $rs1, $rs3, $rs2">, Sched<[]>;
+def FSRIW : RVBTernaryImm5<0b10, 0b101, OPC_OP_IMM_32,
+                           "fsriw", "$rd, $rs1, $rs3, $shamt">, Sched<[]>;
+} // Predicates = [HasStdExtZbt, IsRV64]
+
 let Predicates = [HasStdExtZbb] in {
 def CLZ  : RVBUnary<0b0110000, 0b00000, 0b001, OPC_OP_IMM, "clz">,
            Sched<[WriteCLZ, ReadCLZ]>;
@@ -411,42 +458,45 @@ def CPOP : RVBUnary<0b0110000, 0b00010, 0b001, OPC_OP_IMM, "cpop">,
            Sched<[WriteCPOP, ReadCPOP]>;
 } // Predicates = [HasStdExtZbb]
 
-let Predicates = [HasStdExtZbm, IsRV64] in
-def BMATFLIP : RVBUnary<0b0110000, 0b00011, 0b001, OPC_OP_IMM, "bmatflip">,
-               Sched<[]>;
+let Predicates = [HasStdExtZbb, IsRV64] in {
+def CLZW   : RVBUnary<0b0110000, 0b00000, 0b001, OPC_OP_IMM_32, "clzw">,
+             Sched<[WriteCLZ32, ReadCLZ32]>;
+def CTZW   : RVBUnary<0b0110000, 0b00001, 0b001, OPC_OP_IMM_32, "ctzw">,
+             Sched<[WriteCTZ32, ReadCTZ32]>;
+def CPOPW  : RVBUnary<0b0110000, 0b00010, 0b001, OPC_OP_IMM_32, "cpopw">,
+             Sched<[WriteCPOP32, ReadCPOP32]>;
+} // Predicates = [HasStdExtZbb, IsRV64]
 
 let Predicates = [HasStdExtZbb] in {
-def SEXTB : RVBUnary<0b0110000, 0b00100, 0b001, OPC_OP_IMM, "sext.b">,
-            Sched<[WriteIALU, ReadIALU]>;
-def SEXTH : RVBUnary<0b0110000, 0b00101, 0b001, OPC_OP_IMM, "sext.h">,
-            Sched<[WriteIALU, ReadIALU]>;
+def SEXT_B : RVBUnary<0b0110000, 0b00100, 0b001, OPC_OP_IMM, "sext.b">,
+             Sched<[WriteIALU, ReadIALU]>;
+def SEXT_H : RVBUnary<0b0110000, 0b00101, 0b001, OPC_OP_IMM, "sext.h">,
+             Sched<[WriteIALU, ReadIALU]>;
 } // Predicates = [HasStdExtZbb]
 
 let Predicates = [HasStdExtZbr] in {
-def CRC32B : RVBUnary<0b0110000, 0b10000, 0b001, OPC_OP_IMM, "crc32.b">,
-             Sched<[]>;
-def CRC32H : RVBUnary<0b0110000, 0b10001, 0b001, OPC_OP_IMM, "crc32.h">,
-             Sched<[]>;
-def CRC32W : RVBUnary<0b0110000, 0b10010, 0b001, OPC_OP_IMM, "crc32.w">,
-             Sched<[]>;
-} // Predicates = [HasStdExtZbr]
-
-let Predicates = [HasStdExtZbr, IsRV64] in
-def CRC32D  : RVBUnary<0b0110000, 0b10011, 0b001, OPC_OP_IMM, "crc32.d">,
+def CRC32_B : RVBUnary<0b0110000, 0b10000, 0b001, OPC_OP_IMM, "crc32.b">,
               Sched<[]>;
-
-let Predicates = [HasStdExtZbr] in {
-def CRC32CB : RVBUnary<0b0110000, 0b11000, 0b001, OPC_OP_IMM, "crc32c.b">,
+def CRC32_H : RVBUnary<0b0110000, 0b10001, 0b001, OPC_OP_IMM, "crc32.h">,
               Sched<[]>;
-def CRC32CH : RVBUnary<0b0110000, 0b11001, 0b001, OPC_OP_IMM, "crc32c.h">,
-              Sched<[]>;
-def CRC32CW : RVBUnary<0b0110000, 0b11010, 0b001, OPC_OP_IMM, "crc32c.w">,
+def CRC32_W : RVBUnary<0b0110000, 0b10010, 0b001, OPC_OP_IMM, "crc32.w">,
               Sched<[]>;
+
+def CRC32C_B : RVBUnary<0b0110000, 0b11000, 0b001, OPC_OP_IMM, "crc32c.b">,
+               Sched<[]>;
+def CRC32C_H : RVBUnary<0b0110000, 0b11001, 0b001, OPC_OP_IMM, "crc32c.h">,
+               Sched<[]>;
+def CRC32C_W : RVBUnary<0b0110000, 0b11010, 0b001, OPC_OP_IMM, "crc32c.w">,
+               Sched<[]>;
 } // Predicates = [HasStdExtZbr]
 
-let Predicates = [HasStdExtZbr, IsRV64] in
-def CRC32CD : RVBUnary<0b0110000, 0b11011, 0b001, OPC_OP_IMM, "crc32c.d">,
-              Sched<[]>;
+let Predicates = [HasStdExtZbr, IsRV64] in {
+def CRC32_D  : RVBUnary<0b0110000, 0b10011, 0b001, OPC_OP_IMM, "crc32.d">,
+               Sched<[]>;
+
+def CRC32C_D : RVBUnary<0b0110000, 0b11011, 0b001, OPC_OP_IMM, "crc32c.d">,
+               Sched<[]>;
+} // Predicates = [HasStdExtZbr, IsRV64]
 
 let Predicates = [HasStdExtZbc] in {
 def CLMULR : ALU_rr<0b0000101, 0b010, "clmulr">,
@@ -472,8 +522,6 @@ def MAXU : ALU_rr<0b0000101, 0b111, "maxu">,
 } // Predicates = [HasStdExtZbb]
 
 let Predicates = [HasStdExtZbp] in {
-def SHFL   : ALU_rr<0b0000100, 0b001, "shfl">, Sched<[]>;
-def UNSHFL : ALU_rr<0b0000100, 0b101, "unshfl">, Sched<[]>;
 } // Predicates = [HasStdExtZbp]
 
 let Predicates = [HasStdExtZbe] in {
@@ -483,15 +531,31 @@ def BDECOMPRESS : ALU_rr<0b0100100, 0b110, "bdecompress">, Sched<[]>;
 def BCOMPRESS   : ALU_rr<0b0000100, 0b110, "bcompress">, Sched<[]>;
 } // Predicates = [HasStdExtZbe]
 
+let Predicates = [HasStdExtZbe, IsRV64] in {
+// NOTE: These mnemonics are from the 0.94 spec. There is a name conflict with
+// bextw in the 0.93 spec.
+def BDECOMPRESSW : ALUW_rr<0b0100100, 0b110, "bdecompressw">, Sched<[]>;
+def BCOMPRESSW   : ALUW_rr<0b0000100, 0b110, "bcompressw">, Sched<[]>;
+} // Predicates = [HasStdExtZbe, IsRV64]
+
 let Predicates = [HasStdExtZbpOrZbkb] in {
 def PACK  : ALU_rr<0b0000100, 0b100, "pack">, Sched<[]>;
 def PACKH : ALU_rr<0b0000100, 0b111, "packh">, Sched<[]>;
 } // Predicates = [HasStdExtZbpOrZbkb]
 
+let Predicates = [HasStdExtZbpOrZbkb, IsRV64] in 
+def PACKW  : ALUW_rr<0b0000100, 0b100, "packw">, Sched<[]>;
+
 let Predicates = [HasStdExtZbp] in 
 def PACKU : ALU_rr<0b0100100, 0b100, "packu">, Sched<[]>;
 
+let Predicates = [HasStdExtZbp, IsRV64] in 
+def PACKUW : ALUW_rr<0b0100100, 0b100, "packuw">, Sched<[]>;
+
 let Predicates = [HasStdExtZbm, IsRV64] in {
+def BMATFLIP : RVBUnary<0b0110000, 0b00011, 0b001, OPC_OP_IMM, "bmatflip">,
+               Sched<[]>;
+
 def BMATOR   : ALU_rr<0b0000100, 0b011, "bmator">, Sched<[]>;
 def BMATXOR  : ALU_rr<0b0100100, 0b011, "bmatxor">, Sched<[]>;
 } // Predicates = [HasStdExtZbm, IsRV64]
@@ -500,105 +564,18 @@ let Predicates = [HasStdExtZbf] in
 def BFP : ALU_rr<0b0100100, 0b111, "bfp">,
           Sched<[WriteBFP, ReadBFP, ReadBFP]>;
 
-let Predicates = [HasStdExtZbp] in {
-def SHFLI   : RVBShfl_ri<0b0000100, 0b001, OPC_OP_IMM, "shfli">, Sched<[]>;
-def UNSHFLI : RVBShfl_ri<0b0000100, 0b101, OPC_OP_IMM, "unshfli">, Sched<[]>;
-} // Predicates = [HasStdExtZbp]
-
-let Predicates = [HasStdExtZba, IsRV64] in {
-def SLLIUW : RVBShift_ri<0b00001, 0b001, OPC_OP_IMM_32, "slli.uw">,
-             Sched<[WriteShiftImm32, ReadShiftImm32]>;
-def ADDUW : ALUW_rr<0b0000100, 0b000, "add.uw">,
-            Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>;
-def SH1ADDUW : ALUW_rr<0b0010000, 0b010, "sh1add.uw">,
-               Sched<[WriteSHXADD32, ReadSHXADD32, ReadSHXADD32]>;
-def SH2ADDUW : ALUW_rr<0b0010000, 0b100, "sh2add.uw">,
-               Sched<[WriteSHXADD32, ReadSHXADD32, ReadSHXADD32]>;
-def SH3ADDUW : ALUW_rr<0b0010000, 0b110, "sh3add.uw">,
-               Sched<[WriteSHXADD32, ReadSHXADD32, ReadSHXADD32]>;
-} // Predicates = [HasStdExtZbb, IsRV64]
-
-let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in {
-def ROLW  : ALUW_rr<0b0110000, 0b001, "rolw">,
-            Sched<[WriteRotateReg32, ReadRotateReg32, ReadRotateReg32]>;
-def RORW  : ALUW_rr<0b0110000, 0b101, "rorw">,
-            Sched<[WriteRotateReg32, ReadRotateReg32, ReadRotateReg32]>;
-} // Predicates = [HasStdExtZbbOrZbp, IsRV64]
-
-let Predicates = [HasStdExtZbp, IsRV64] in {
-def GORCW  : ALUW_rr<0b0010100, 0b101, "gorcw">, Sched<[]>;
-def GREVW  : ALUW_rr<0b0110100, 0b101, "grevw">, Sched<[]>;
-} // Predicates = [HasStdExtZbp, IsRV64]
-
-let Predicates = [HasStdExtZbp, IsRV64] in {
-def XPERMW : ALU_rr<0b0010100, 0b000, "xperm.w">, Sched<[]>;
-} // Predicates = [HasStdExtZbp, IsRV64]
-
-let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in
-def RORIW : RVBShiftW_ri<0b0110000, 0b101, OPC_OP_IMM_32, "roriw">,
-            Sched<[WriteRotateImm32, ReadRotateImm32]>;
-
-let Predicates = [HasStdExtZbp, IsRV64] in {
-def GORCIW : RVBShiftW_ri<0b0010100, 0b101, OPC_OP_IMM_32, "gorciw">, Sched<[]>;
-def GREVIW : RVBShiftW_ri<0b0110100, 0b101, OPC_OP_IMM_32, "greviw">, Sched<[]>;
-} // Predicates = [HasStdExtZbp, IsRV64]
-
-let Predicates = [HasStdExtZbt, IsRV64] in {
-def FSLW  : RVBTernaryR<0b10, 0b001, OPC_OP_32,
-                        "fslw", "$rd, $rs1, $rs3, $rs2">, Sched<[]>;
-def FSRW  : RVBTernaryR<0b10, 0b101, OPC_OP_32, "fsrw",
-                        "$rd, $rs1, $rs3, $rs2">, Sched<[]>;
-def FSRIW : RVBTernaryImm5<0b10, 0b101, OPC_OP_IMM_32,
-                           "fsriw", "$rd, $rs1, $rs3, $shamt">, Sched<[]>;
-} // Predicates = [HasStdExtZbt, IsRV64]
-
-let Predicates = [HasStdExtZbb, IsRV64] in {
-def CLZW   : RVBUnary<0b0110000, 0b00000, 0b001, OPC_OP_IMM_32, "clzw">,
-             Sched<[WriteCLZ32, ReadCLZ32]>;
-def CTZW   : RVBUnary<0b0110000, 0b00001, 0b001, OPC_OP_IMM_32, "ctzw">,
-             Sched<[WriteCTZ32, ReadCTZ32]>;
-def CPOPW  : RVBUnary<0b0110000, 0b00010, 0b001, OPC_OP_IMM_32, "cpopw">,
-             Sched<[WriteCPOP32, ReadCPOP32]>;
-} // Predicates = [HasStdExtZbb, IsRV64]
-
-let Predicates = [HasStdExtZbp, IsRV64] in {
-def SHFLW   : ALUW_rr<0b0000100, 0b001, "shflw">, Sched<[]>;
-def UNSHFLW : ALUW_rr<0b0000100, 0b101, "unshflw">, Sched<[]>;
-} // Predicates = [HasStdExtZbp, IsRV64]
-
-let Predicates = [HasStdExtZbe, IsRV64] in {
-// NOTE: These mnemonics are from the 0.94 spec. There is a name conflict with
-// bextw in the 0.93 spec.
-def BDECOMPRESSW : ALUW_rr<0b0100100, 0b110, "bdecompressw">, Sched<[]>;
-def BCOMPRESSW   : ALUW_rr<0b0000100, 0b110, "bcompressw">, Sched<[]>;
-} // Predicates = [HasStdExtZbe, IsRV64]
-
-let Predicates = [HasStdExtZbpOrZbkb, IsRV64] in 
-def PACKW  : ALUW_rr<0b0000100, 0b100, "packw">, Sched<[]>;
-
-let Predicates = [HasStdExtZbp, IsRV64] in 
-def PACKUW : ALUW_rr<0b0100100, 0b100, "packuw">, Sched<[]>;
-
 let Predicates = [HasStdExtZbf, IsRV64] in
 def BFPW : ALUW_rr<0b0100100, 0b111, "bfpw">,
            Sched<[WriteBFP32, ReadBFP32, ReadBFP32]>;
 
 let Predicates = [HasStdExtZbbOrZbp, IsRV32] in {
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-def ZEXTH_RV32 : RVInstR<0b0000100, 0b100, OPC_OP, (outs GPR:$rd),
-                         (ins GPR:$rs1), "zext.h", "$rd, $rs1">,
-                 Sched<[WriteIALU, ReadIALU]> {
-  let rs2 = 0b00000;
-}
+def ZEXT_H_RV32 : RVBUnary<0b0000100, 0b00000, 0b100, OPC_OP, "zext.h">,
+                  Sched<[WriteIALU, ReadIALU]>;
 } // Predicates = [HasStdExtZbbOrZbp, IsRV32]
 
 let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-def ZEXTH_RV64 : RVInstR<0b0000100, 0b100, OPC_OP_32, (outs GPR:$rd),
-                         (ins GPR:$rs1), "zext.h", "$rd, $rs1">,
-                 Sched<[WriteIALU, ReadIALU]> {
-  let rs2 = 0b00000;
-}
+def ZEXT_H_RV64 : RVBUnary<0b0000100, 0b00000, 0b100, OPC_OP_32, "zext.h">,
+                  Sched<[WriteIALU, ReadIALU]>;
 } // Predicates = [HasStdExtZbbOrZbp, IsRV64]
 
 // We treat rev8 and orc.b as standalone instructions even though they use a
@@ -619,8 +596,8 @@ def REV8_RV64 : RVBUnary<0b0110101, 0b11000, 0b101, OPC_OP_IMM, "rev8">,
 } // Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64]
 
 let Predicates = [HasStdExtZbbOrZbp] in {
-def ORCB : RVBUnary<0b0010100, 0b00111, 0b101, OPC_OP_IMM, "orc.b">,
-           Sched<[WriteORCB, ReadORCB]>;
+def ORC_B : RVBUnary<0b0010100, 0b00111, 0b101, OPC_OP_IMM, "orc.b">,
+            Sched<[WriteORCB, ReadORCB]>;
 } // Predicates = [HasStdExtZbbOrZbp]
 
 let Predicates = [HasStdExtZbpOrZbkb] in 
@@ -637,7 +614,7 @@ def UNZIP_RV32 : RVBUnary<0b0000100, 0b01111, 0b101, OPC_OP_IMM, "unzip">;
 //===----------------------------------------------------------------------===//
 
 let Predicates = [HasStdExtZba, IsRV64] in {
-def : InstAlias<"zext.w $rd, $rs", (ADDUW GPR:$rd, GPR:$rs, X0)>;
+def : InstAlias<"zext.w $rd, $rs", (ADD_UW GPR:$rd, GPR:$rs, X0)>;
 }
 
 let Predicates = [HasStdExtZbp] in {
@@ -775,8 +752,10 @@ def : InstAlias<"gorcw $rd, $rs1, $shamt",
 // Zbp is unratified and that it would likely adopt the already ratified Zbkx names.
 // Thus current Zbp instructions are defined as aliases for Zbkx instructions.
 let Predicates = [HasStdExtZbp] in {
-  def : InstAlias<"xperm.b $rd, $rs1, $rs2", (XPERMB GPR:$rd, GPR:$rs1, GPR:$rs2)>;
-  def : InstAlias<"xperm.n $rd, $rs1, $rs2", (XPERMN GPR:$rd, GPR:$rs1, GPR:$rs2)>;
+  def : InstAlias<"xperm.b $rd, $rs1, $rs2",
+                  (XPERM8 GPR:$rd, GPR:$rs1, GPR:$rs2)>;
+  def : InstAlias<"xperm.n $rd, $rs1, $rs2",
+                  (XPERM4 GPR:$rd, GPR:$rs1, GPR:$rs2)>;
 } // Predicates = [HasStdExtZbp]
 
 let Predicates = [HasStdExtZbs] in {
@@ -803,8 +782,22 @@ def : Pat<(xor GPR:$rs1, (not GPR:$rs2)), (XNOR GPR:$rs1, GPR:$rs2)>;
 let Predicates = [HasStdExtZbbOrZbpOrZbkb] in {
 def : PatGprGpr<rotl, ROL>;
 def : PatGprGpr<rotr, ROR>;
+
+def : PatGprImm<rotr, RORI, uimmlog2xlen>;
+// There's no encoding for roli in the the 'B' extension as it can be
+// implemented with rori by negating the immediate.
+def : Pat<(rotl GPR:$rs1, uimmlog2xlen:$shamt),
+          (RORI GPR:$rs1, (ImmSubFromXLen uimmlog2xlen:$shamt))>;
 } // Predicates = [HasStdExtZbbOrZbpOrZbkb]
 
+let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in {
+def : PatGprGpr<riscv_rolw, ROLW>;
+def : PatGprGpr<riscv_rorw, RORW>;
+def : PatGprImm<riscv_rorw, RORIW, uimm5>;
+def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2),
+          (RORIW GPR:$rs1, (ImmSubFrom32 uimm5:$rs2))>;
+} // Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64]
+
 let Predicates = [HasStdExtZbs] in {
 def : Pat<(and (not (shiftop<shl> 1, GPR:$rs2)), GPR:$rs1),
           (BCLR GPR:$rs1, GPR:$rs2)>;
@@ -852,48 +845,62 @@ def : Pat<(and GPR:$r, BCLRIANDIMask:$i),
                  (BCLRITwoBitsMaskHigh BCLRIANDIMask:$i))>;
 }
 
-// There's no encoding for roli in the the 'B' extension as it can be
-// implemented with rori by negating the immediate.
-let Predicates = [HasStdExtZbbOrZbpOrZbkb] in {
-def : PatGprImm<rotr, RORI, uimmlog2xlen>;
-def : Pat<(rotl GPR:$rs1, uimmlog2xlen:$shamt),
-          (RORI GPR:$rs1, (ImmSubFromXLen uimmlog2xlen:$shamt))>;
-
+let Predicates = [HasStdExtZbbOrZbp] in {
 // We treat orc.b as a separate instruction, so match it directly. We also
 // lower the Zbb orc.b intrinsic to this.
-def : Pat<(riscv_gorc GPR:$rs1, 7), (ORCB GPR:$rs1)>;
+def : Pat<(riscv_gorc GPR:$rs1, 7), (ORC_B GPR:$rs1)>;
+}
+
+let Predicates = [HasStdExtZbpOrZbkb] in {
+// We treat brev8 as a separate instruction, so match it directly. We also
+// use this for brev8 when lowering bitreverse with Zbkb.
+def : Pat<(riscv_grev GPR:$rs1, 7), (BREV8 GPR:$rs1)>;
+
+// We treat zip and unzip as separate instructions, so match it directly.
+def : Pat<(i32 (riscv_shfl GPR:$rs1, 15)), (ZIP_RV32 GPR:$rs1)>;
+def : Pat<(i32 (riscv_unshfl GPR:$rs1, 15)), (UNZIP_RV32 GPR:$rs1)>;
 }
 
 let Predicates = [HasStdExtZbp] in {
 def : PatGprGpr<riscv_grev, GREV>;
 def : PatGprGpr<riscv_gorc, GORC>;
+def : PatGprImm<riscv_grev, GREVI, uimmlog2xlen>;
+def : PatGprImm<riscv_gorc, GORCI, uimmlog2xlen>;
+
 def : PatGprGpr<riscv_shfl, SHFL>;
 def : PatGprGpr<riscv_unshfl, UNSHFL>;
-def : PatGprGpr<int_riscv_xperm_n, XPERMN>;
-def : PatGprGpr<int_riscv_xperm_b, XPERMB>;
-def : PatGprGpr<int_riscv_xperm_h, XPERMH>;
 def : PatGprImm<riscv_shfl, SHFLI, shfl_uimm>;
 def : PatGprImm<riscv_unshfl, UNSHFLI, shfl_uimm>;
-def : PatGprImm<riscv_grev, GREVI, uimmlog2xlen>;
-def : PatGprImm<riscv_gorc, GORCI, uimmlog2xlen>;
 
-// We treat brev8 as a separate instruction, so match it directly.
-def : Pat<(riscv_grev GPR:$rs1, 7), (BREV8 GPR:$rs1)>;
+def : PatGprGpr<int_riscv_xperm_n, XPERM4>;
+def : PatGprGpr<int_riscv_xperm_b, XPERM8>;
+def : PatGprGpr<int_riscv_xperm_h, XPERM_H>;
 } // Predicates = [HasStdExtZbp]
 
+let Predicates = [HasStdExtZbp, IsRV64] in {
+def : PatGprGpr<riscv_grevw, GREVW>;
+def : PatGprGpr<riscv_gorcw, GORCW>;
+def : PatGprImm<riscv_grevw, GREVIW, uimm5>;
+def : PatGprImm<riscv_gorcw, GORCIW, uimm5>;
+
+// FIXME: Move to DAG combine.
+def : Pat<(riscv_rorw (riscv_grevw GPR:$rs1, 24), 16), (GREVIW GPR:$rs1, 8)>;
+def : Pat<(riscv_rolw (riscv_grevw GPR:$rs1, 24), 16), (GREVIW GPR:$rs1, 8)>;
+
+def : PatGprGpr<riscv_shflw, SHFLW>;
+def : PatGprGpr<riscv_unshflw, UNSHFLW>;
+} // Predicates = [HasStdExtZbp, IsRV64]
+
 let Predicates = [HasStdExtZbp, IsRV64] in
-def : PatGprGpr<int_riscv_xperm_w, XPERMW>;
+def : PatGprGpr<int_riscv_xperm_w, XPERM_W>;
 
 let Predicates = [HasStdExtZbp, IsRV32] in {
+// FIXME : Move to DAG combine.
 def : Pat<(i32 (rotr (riscv_grev GPR:$rs1, 24), (i32 16))), (GREVI GPR:$rs1, 8)>;
 def : Pat<(i32 (rotl (riscv_grev GPR:$rs1, 24), (i32 16))), (GREVI GPR:$rs1, 8)>;
 
 // We treat rev8 as a separate instruction, so match it directly.
 def : Pat<(i32 (riscv_grev GPR:$rs1, 24)), (REV8_RV32 GPR:$rs1)>;
-
-// We treat zip and unzip as separate instructions, so match it directly.
-def : Pat<(i32 (riscv_shfl GPR:$rs1, 15)), (ZIP_RV32 GPR:$rs1)>;
-def : Pat<(i32 (riscv_unshfl GPR:$rs1, 15)), (UNZIP_RV32 GPR:$rs1)>;
 } // Predicates = [HasStdExtZbp, IsRV32]
 
 let Predicates = [HasStdExtZbp, IsRV64] in {
@@ -942,15 +949,34 @@ def : Pat<(riscv_fsl GPR:$rs3, GPR:$rs1, uimmlog2xlen:$shamt),
           (FSRI GPR:$rs1, GPR:$rs3, (ImmSubFromXLen uimmlog2xlen:$shamt))>;
 } // Predicates = [HasStdExtZbt]
 
+let Predicates = [HasStdExtZbt, IsRV64] in {
+def : Pat<(riscv_fslw GPR:$rs1, GPR:$rs3, GPR:$rs2),
+          (FSLW GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+def : Pat<(riscv_fsrw GPR:$rs1, GPR:$rs3, GPR:$rs2),
+          (FSRW GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+def : Pat<(riscv_fsrw GPR:$rs1, GPR:$rs3, uimm5:$shamt),
+          (FSRIW GPR:$rs1, GPR:$rs3, uimm5:$shamt)>;
+// We can use FSRIW for FSLW by immediate if we subtract the immediate from
+// 32 and swap the operands.
+def : Pat<(riscv_fslw GPR:$rs3, GPR:$rs1, uimm5:$shamt),
+          (FSRIW GPR:$rs1, GPR:$rs3, (ImmSubFrom32 uimm5:$shamt))>;
+} // Predicates = [HasStdExtZbt, IsRV64]
+
 let Predicates = [HasStdExtZbb] in {
 def : PatGpr<ctlz, CLZ>;
 def : PatGpr<cttz, CTZ>;
 def : PatGpr<ctpop, CPOP>;
 } // Predicates = [HasStdExtZbb]
 
+let Predicates = [HasStdExtZbb, IsRV64] in {
+def : PatGpr<riscv_clzw, CLZW>;
+def : PatGpr<riscv_ctzw, CTZW>;
+def : Pat<(i64 (ctpop (i64 (zexti32 (i64 GPR:$rs1))))), (CPOPW GPR:$rs1)>;
+} // Predicates = [HasStdExtZbb, IsRV64]
+
 let Predicates = [HasStdExtZbb] in {
-def : Pat<(sext_inreg GPR:$rs1, i8), (SEXTB GPR:$rs1)>;
-def : Pat<(sext_inreg GPR:$rs1, i16), (SEXTH GPR:$rs1)>;
+def : Pat<(sext_inreg GPR:$rs1, i8), (SEXT_B GPR:$rs1)>;
+def : Pat<(sext_inreg GPR:$rs1, i16), (SEXT_H GPR:$rs1)>;
 }
 
 let Predicates = [HasStdExtZbb] in {
@@ -968,35 +994,49 @@ let Predicates = [HasStdExtZbbOrZbkb, IsRV64] in {
 def : Pat<(i64 (bswap GPR:$rs1)), (REV8_RV64 GPR:$rs1)>;
 } // Predicates = [HasStdExtZbbOrZbkb, IsRV64]
 
+let Predicates = [HasStdExtZbpOrZbkb] in {
+def : Pat<(or (and (shl GPR:$rs2, (XLenVT 8)), 0xFFFF),
+              (and GPR:$rs1, 0x00FF)),
+          (PACKH GPR:$rs1, GPR:$rs2)>;
+def : Pat<(or (shl (and GPR:$rs2, 0x00FF), (XLenVT 8)),
+              (and GPR:$rs1, 0x00FF)),
+          (PACKH GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasStdExtZbpOrZbkb]
+
 let Predicates = [HasStdExtZbpOrZbkb, IsRV32] in
 def : Pat<(i32 (or (and GPR:$rs1, 0x0000FFFF), (shl GPR:$rs2, (i32 16)))),
           (PACK GPR:$rs1, GPR:$rs2)>;
 
+let Predicates = [HasStdExtZbpOrZbkb, IsRV64] in {
+def : Pat<(i64 (or (and GPR:$rs1, 0x00000000FFFFFFFF), (shl GPR:$rs2, (i64 32)))),
+          (PACK GPR:$rs1, GPR:$rs2)>;
+
+def : Pat<(i64 (sext_inreg (or (shl GPR:$rs2, (i64 16)),
+                               (and GPR:$rs1, 0x000000000000FFFF)),
+                           i32)),
+          (PACKW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(i64 (or (sext_inreg (shl GPR:$rs2, (i64 16)), i32),
+                   (and GPR:$rs1, 0x000000000000FFFF))),
+          (PACKW GPR:$rs1, GPR:$rs2)>;
+}
+
 let Predicates = [HasStdExtZbp, IsRV32] in
 def : Pat<(i32 (or (and GPR:$rs2, 0xFFFF0000), (srl GPR:$rs1, (i32 16)))),
           (PACKU GPR:$rs1, GPR:$rs2)>;
 
-let Predicates = [HasStdExtZbpOrZbkb, IsRV64] in
-def : Pat<(i64 (or (and GPR:$rs1, 0x00000000FFFFFFFF), (shl GPR:$rs2, (i64 32)))),
-          (PACK GPR:$rs1, GPR:$rs2)>;
-
-let Predicates = [HasStdExtZbp, IsRV64] in
+let Predicates = [HasStdExtZbp, IsRV64] in {
 def : Pat<(i64 (or (and GPR:$rs2, 0xFFFFFFFF00000000), (srl GPR:$rs1, (i64 32)))),
           (PACKU GPR:$rs1, GPR:$rs2)>;
 
-let Predicates = [HasStdExtZbpOrZbkb] in {
-def : Pat<(or (and (shl GPR:$rs2, (XLenVT 8)), 0xFFFF),
-              (and GPR:$rs1, 0x00FF)),
-          (PACKH GPR:$rs1, GPR:$rs2)>;
-def : Pat<(or (shl (and GPR:$rs2, 0x00FF), (XLenVT 8)),
-              (and GPR:$rs1, 0x00FF)),
-          (PACKH GPR:$rs1, GPR:$rs2)>;
-} // Predicates = [HasStdExtZbpOrZbkb]
+def : Pat<(i64 (or (and (assertsexti32 GPR:$rs2), 0xFFFFFFFFFFFF0000),
+                   (srl (and GPR:$rs1, 0xFFFFFFFF), (i64 16)))),
+          (PACKUW GPR:$rs1, GPR:$rs2)>;
+}
 
 let Predicates = [HasStdExtZbbOrZbp, IsRV32] in
-def : Pat<(i32 (and GPR:$rs, 0xFFFF)), (ZEXTH_RV32 GPR:$rs)>;
+def : Pat<(i32 (and GPR:$rs, 0xFFFF)), (ZEXT_H_RV32 GPR:$rs)>;
 let Predicates = [HasStdExtZbbOrZbp, IsRV64] in
-def : Pat<(i64 (and GPR:$rs, 0xFFFF)), (ZEXTH_RV64 GPR:$rs)>;
+def : Pat<(i64 (and GPR:$rs, 0xFFFF)), (ZEXT_H_RV64 GPR:$rs)>;
 
 // Pattern to exclude simm12 immediates from matching.
 def non_imm12 : PatLeaf<(XLenVT GPR:$a), [{
@@ -1074,80 +1114,26 @@ def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 81)),
 
 let Predicates = [HasStdExtZba, IsRV64] in {
 def : Pat<(i64 (shl (and GPR:$rs1, 0xFFFFFFFF), uimm5:$shamt)),
-          (SLLIUW GPR:$rs1, uimm5:$shamt)>;
+          (SLLI_UW GPR:$rs1, uimm5:$shamt)>;
 def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFF), non_imm12:$rs2)),
-          (ADDUW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(i64 (and GPR:$rs, 0xFFFFFFFF)), (ADDUW GPR:$rs, X0)>;
+          (ADD_UW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(i64 (and GPR:$rs, 0xFFFFFFFF)), (ADD_UW GPR:$rs, X0)>;
 
 def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 1)), non_imm12:$rs2)),
-          (SH1ADDUW GPR:$rs1, GPR:$rs2)>;
+          (SH1ADD_UW GPR:$rs1, GPR:$rs2)>;
 def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 2)), non_imm12:$rs2)),
-          (SH2ADDUW GPR:$rs1, GPR:$rs2)>;
+          (SH2ADD_UW GPR:$rs1, GPR:$rs2)>;
 def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 3)), non_imm12:$rs2)),
-          (SH3ADDUW GPR:$rs1, GPR:$rs2)>;
+          (SH3ADD_UW GPR:$rs1, GPR:$rs2)>;
 
 def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 1)), 0x1FFFFFFFF), non_imm12:$rs2)),
-          (SH1ADDUW GPR:$rs1, GPR:$rs2)>;
+          (SH1ADD_UW GPR:$rs1, GPR:$rs2)>;
 def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 2)), 0x3FFFFFFFF), non_imm12:$rs2)),
-          (SH2ADDUW GPR:$rs1, GPR:$rs2)>;
+          (SH2ADD_UW GPR:$rs1, GPR:$rs2)>;
 def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), non_imm12:$rs2)),
-          (SH3ADDUW GPR:$rs1, GPR:$rs2)>;
+          (SH3ADD_UW GPR:$rs1, GPR:$rs2)>;
 } // Predicates = [HasStdExtZba, IsRV64]
 
-let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in {
-def : PatGprGpr<riscv_rolw, ROLW>;
-def : PatGprGpr<riscv_rorw, RORW>;
-def : PatGprImm<riscv_rorw, RORIW, uimm5>;
-def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2),
-          (RORIW GPR:$rs1, (ImmSubFrom32 uimm5:$rs2))>;
-} // Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64]
-
-let Predicates = [HasStdExtZbp, IsRV64] in {
-def : Pat<(riscv_rorw (riscv_grevw GPR:$rs1, 24), 16), (GREVIW GPR:$rs1, 8)>;
-def : Pat<(riscv_rolw (riscv_grevw GPR:$rs1, 24), 16), (GREVIW GPR:$rs1, 8)>;
-def : PatGprGpr<riscv_grevw, GREVW>;
-def : PatGprGpr<riscv_gorcw, GORCW>;
-def : PatGprGpr<riscv_shflw, SHFLW>;
-def : PatGprGpr<riscv_unshflw, UNSHFLW>;
-def : PatGprImm<riscv_grevw, GREVIW, uimm5>;
-def : PatGprImm<riscv_gorcw, GORCIW, uimm5>;
-} // Predicates = [HasStdExtZbp, IsRV64]
-
-let Predicates = [HasStdExtZbt, IsRV64] in {
-def : Pat<(riscv_fslw GPR:$rs1, GPR:$rs3, GPR:$rs2),
-          (FSLW GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
-def : Pat<(riscv_fsrw GPR:$rs1, GPR:$rs3, GPR:$rs2),
-          (FSRW GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
-def : Pat<(riscv_fsrw GPR:$rs1, GPR:$rs3, uimm5:$shamt),
-          (FSRIW GPR:$rs1, GPR:$rs3, uimm5:$shamt)>;
-// We can use FSRIW for FSLW by immediate if we subtract the immediate from
-// 32 and swap the operands.
-def : Pat<(riscv_fslw GPR:$rs3, GPR:$rs1, uimm5:$shamt),
-          (FSRIW GPR:$rs1, GPR:$rs3, (ImmSubFrom32 uimm5:$shamt))>;
-} // Predicates = [HasStdExtZbt, IsRV64]
-
-let Predicates = [HasStdExtZbb, IsRV64] in {
-def : PatGpr<riscv_clzw, CLZW>;
-def : PatGpr<riscv_ctzw, CTZW>;
-def : Pat<(i64 (ctpop (i64 (zexti32 (i64 GPR:$rs1))))), (CPOPW GPR:$rs1)>;
-} // Predicates = [HasStdExtZbb, IsRV64]
-
-let Predicates = [HasStdExtZbpOrZbkb, IsRV64] in {
-def : Pat<(i64 (sext_inreg (or (shl GPR:$rs2, (i64 16)),
-                               (and GPR:$rs1, 0x000000000000FFFF)),
-                           i32)),
-          (PACKW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(i64 (or (sext_inreg (shl GPR:$rs2, (i64 16)), i32),
-                   (and GPR:$rs1, 0x000000000000FFFF))),
-          (PACKW GPR:$rs1, GPR:$rs2)>;
-}
-
-let Predicates = [HasStdExtZbp, IsRV64] in
-def : Pat<(i64 (or (and (assertsexti32 GPR:$rs2), 0xFFFFFFFFFFFF0000),
-                   (srl (and GPR:$rs1, 0xFFFFFFFF), (i64 16)))),
-          (PACKUW GPR:$rs1, GPR:$rs2)>;
-
-
 let Predicates = [HasStdExtZbcOrZbkc] in {
 def : PatGprGpr<int_riscv_clmul, CLMUL>;
 def : PatGprGpr<int_riscv_clmulh, CLMULH>;
@@ -1167,17 +1153,17 @@ def : PatGprGpr<riscv_bdecompressw, BDECOMPRESSW>;
 } // Predicates = [HasStdExtZbe, IsRV64]
 
 let Predicates = [HasStdExtZbr] in {
-def : PatGpr<int_riscv_crc32_b, CRC32B>;
-def : PatGpr<int_riscv_crc32_h, CRC32H>;
-def : PatGpr<int_riscv_crc32_w, CRC32W>;
-def : PatGpr<int_riscv_crc32c_b, CRC32CB>;
-def : PatGpr<int_riscv_crc32c_h, CRC32CH>;
-def : PatGpr<int_riscv_crc32c_w, CRC32CW>;
+def : PatGpr<int_riscv_crc32_b, CRC32_B>;
+def : PatGpr<int_riscv_crc32_h, CRC32_H>;
+def : PatGpr<int_riscv_crc32_w, CRC32_W>;
+def : PatGpr<int_riscv_crc32c_b, CRC32C_B>;
+def : PatGpr<int_riscv_crc32c_h, CRC32C_H>;
+def : PatGpr<int_riscv_crc32c_w, CRC32C_W>;
 } // Predicates = [HasStdExtZbr]
 
 let Predicates = [HasStdExtZbr, IsRV64] in {
-def : PatGpr<int_riscv_crc32_d, CRC32D>;
-def : PatGpr<int_riscv_crc32c_d, CRC32CD>;
+def : PatGpr<int_riscv_crc32_d, CRC32_D>;
+def : PatGpr<int_riscv_crc32c_d, CRC32C_D>;
 } // Predicates = [HasStdExtZbr, IsRV64]
 
 let Predicates = [HasStdExtZbf] in
@@ -1186,16 +1172,7 @@ def : PatGprGpr<riscv_bfp, BFP>;
 let Predicates = [HasStdExtZbf, IsRV64] in
 def : PatGprGpr<riscv_bfpw, BFPW>;
 
-let Predicates = [HasStdExtZbkb] in {
-def : PatGpr<int_riscv_brev8, BREV8>;
-} // Predicates = [HasStdExtZbkb]
-
-let Predicates = [HasStdExtZbkb, IsRV32] in {
-def : PatGpr<int_riscv_zip, ZIP_RV32>;
-def : PatGpr<int_riscv_unzip, UNZIP_RV32>;
-} // Predicates = [HasStdExtZbkb, IsRV32]
-
 let Predicates = [HasStdExtZbkx] in {
-def : PatGprGpr<int_riscv_xperm4, XPERMN>;
-def : PatGprGpr<int_riscv_xperm8, XPERMB>;
+def : PatGprGpr<int_riscv_xperm4, XPERM4>;
+def : PatGprGpr<int_riscv_xperm8, XPERM8>;
 }
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
index dfd0c74ee26c..a2753c132354 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
@@ -29,14 +29,14 @@ def riscv_fmv_x_anyexth
 // Instructions
 //===----------------------------------------------------------------------===//
 
-let Predicates = [HasStdExtZfhmin] in {
+let Predicates = [HasStdExtZfhOrZfhmin] in {
 def FLH : FPLoad_r<0b001, "flh", FPR16, WriteFLD16>;
 
 // Operands for stores are in the order srcreg, base, offset rather than
 // reflecting the order these fields are specified in the instruction
 // encoding.
 def FSH : FPStore_r<0b001, "fsh", FPR16, WriteFST16>;
-} // Predicates = [HasStdExtZfhmin]
+} // Predicates = [HasStdExtZfhOrZfhmin]
 
 let Predicates = [HasStdExtZfh] in {
 let SchedRW = [WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16] in {
@@ -98,7 +98,7 @@ def FCVT_H_WU : FPUnaryOp_r_frm<0b1101010, 0b00001, FPR16, GPR, "fcvt.h.wu">,
 def           : FPUnaryOpDynFrmAlias<FCVT_H_WU, "fcvt.h.wu", FPR16, GPR>;
 } // Predicates = [HasStdExtZfh]
 
-let Predicates = [HasStdExtZfhmin] in {
+let Predicates = [HasStdExtZfhOrZfhmin] in {
 def FCVT_H_S : FPUnaryOp_r_frm<0b0100010, 0b00000, FPR16, FPR32, "fcvt.h.s">,
                Sched<[WriteFCvtF32ToF16, ReadFCvtF32ToF16]>;
 def          : FPUnaryOpDynFrmAlias<FCVT_H_S, "fcvt.h.s", FPR16, FPR32>;
@@ -113,7 +113,7 @@ def FMV_X_H : FPUnaryOp_r<0b1110010, 0b00000, 0b000, GPR, FPR16, "fmv.x.h">,
 let mayRaiseFPException = 0 in
 def FMV_H_X : FPUnaryOp_r<0b1111010, 0b00000, 0b000, FPR16, GPR, "fmv.h.x">,
               Sched<[WriteFMovI16ToF16, ReadFMovI16ToF16]>;
-} // Predicates = [HasStdExtZfhmin]
+} // Predicates = [HasStdExtZfhOrZfhmin]
 
 let Predicates = [HasStdExtZfh] in {
 
@@ -146,23 +146,23 @@ def FCVT_H_LU : FPUnaryOp_r_frm<0b1101010, 0b00011, FPR16, GPR, "fcvt.h.lu">,
 def           : FPUnaryOpDynFrmAlias<FCVT_H_LU, "fcvt.h.lu", FPR16, GPR>;
 } // Predicates = [HasStdExtZfh, IsRV64]
 
-let Predicates = [HasStdExtZfhmin, HasStdExtD] in {
+let Predicates = [HasStdExtZfhOrZfhmin, HasStdExtD] in {
 def FCVT_H_D : FPUnaryOp_r_frm<0b0100010, 0b00001, FPR16, FPR64, "fcvt.h.d">,
                Sched<[WriteFCvtF64ToF16, ReadFCvtF64ToF16]>;
 def          : FPUnaryOpDynFrmAlias<FCVT_H_D, "fcvt.h.d", FPR16, FPR64>;
 
 def FCVT_D_H : FPUnaryOp_r<0b0100001, 0b00010, 0b000, FPR64, FPR16, "fcvt.d.h">,
                Sched<[WriteFCvtF16ToF64, ReadFCvtF16ToF64]>;
-} // Predicates = [HasStdExtZfhmin, HasStdExtD]
+} // Predicates = [HasStdExtZfhOrZfhmin, HasStdExtD]
 
 //===----------------------------------------------------------------------===//
 // Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20)
 //===----------------------------------------------------------------------===//
 
-let Predicates = [HasStdExtZfhmin] in {
+let Predicates = [HasStdExtZfhOrZfhmin] in {
 def : InstAlias<"flh $rd, (${rs1})",  (FLH FPR16:$rd,  GPR:$rs1, 0), 0>;
 def : InstAlias<"fsh $rs2, (${rs1})", (FSH FPR16:$rs2, GPR:$rs1, 0), 0>;
-} // Predicates = [HasStdExtZfhmin]
+} // Predicates = [HasStdExtZfhOrZfhmin]
 
 let Predicates = [HasStdExtZfh] in {
 def : InstAlias<"fmv.h $rd, $rs",  (FSGNJ_H  FPR16:$rd, FPR16:$rs, FPR16:$rs)>;
@@ -177,14 +177,14 @@ def : InstAlias<"fge.h $rd, $rs, $rt",
                 (FLE_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>;
 } // Predicates = [HasStdExtZfh]
 
-let Predicates = [HasStdExtZfhmin] in {
+let Predicates = [HasStdExtZfhOrZfhmin] in {
 def PseudoFLH  : PseudoFloatLoad<"flh", FPR16>;
 def PseudoFSH  : PseudoStore<"fsh", FPR16>;
 let usesCustomInserter = 1 in {
 def PseudoQuietFLE_H : PseudoQuietFCMP<FPR16>;
 def PseudoQuietFLT_H : PseudoQuietFCMP<FPR16>;
 }
-} // Predicates = [HasStdExtZfhmin]
+} // Predicates = [HasStdExtZfhOrZfhmin]
 
 //===----------------------------------------------------------------------===//
 // Pseudo-instructions and codegen patterns
@@ -281,7 +281,7 @@ def : PatSetCC<FPR16, any_fsetccs, SETOLE, FLE_H>;
 def Select_FPR16_Using_CC_GPR : SelectCC_rrirr<FPR16, GPR>;
 } // Predicates = [HasStdExtZfh]
 
-let Predicates = [HasStdExtZfhmin] in {
+let Predicates = [HasStdExtZfhOrZfhmin] in {
 /// Loads
 
 defm : LdPat<load, FLH, f16>;
@@ -299,7 +299,7 @@ def : Pat<(any_fpextend FPR16:$rs1), (FCVT_S_H FPR16:$rs1)>;
 // Moves (no conversion)
 def : Pat<(riscv_fmv_h_x GPR:$src), (FMV_H_X GPR:$src)>;
 def : Pat<(riscv_fmv_x_anyexth FPR16:$src), (FMV_X_H FPR16:$src)>;
-} // Predicates = [HasStdExtZfhmin]
+} // Predicates = [HasStdExtZfhOrZfhmin]
 
 let Predicates = [HasStdExtZfh, IsRV32] in {
 // half->[u]int. Round-to-zero must be used.
@@ -351,7 +351,7 @@ def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_H_L $rs1, 0b111)>;
 def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_H_LU $rs1, 0b111)>;
 } // Predicates = [HasStdExtZfh, IsRV64]
 
-let Predicates = [HasStdExtZfhmin, HasStdExtD] in {
+let Predicates = [HasStdExtZfhOrZfhmin, HasStdExtD] in {
 /// Float conversion operations
 // f64 -> f16, f16 -> f64
 def : Pat<(any_fpround FPR64:$rs1), (FCVT_H_D FPR64:$rs1, 0b111)>;
@@ -361,4 +361,4 @@ def : Pat<(any_fpextend FPR16:$rs1), (FCVT_D_H FPR16:$rs1)>;
 def : Pat<(fcopysign FPR16:$rs1, FPR64:$rs2),
           (FSGNJ_H $rs1, (FCVT_H_D $rs2, 0b111))>;
 def : Pat<(fcopysign FPR64:$rs1, FPR16:$rs2), (FSGNJ_D $rs1, (FCVT_D_H $rs2))>;
-} // Predicates = [HasStdExtZfhmin, HasStdExtD]
+} // Predicates = [HasStdExtZfhOrZfhmin, HasStdExtD]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td
index 4a41cddedc71..e4e07f4789a6 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td
@@ -1,4 +1,4 @@
-//===- RISCVInstrInfoZk.td - RISC-V Scalar Crypto instructions - tablegen -*===//
+//===- RISCVInstrInfoZk.td - RISC-V 'Zk' instructions ------*- tablegen -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp
index 12ec52925798..715d92b036e3 100644
--- a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp
@@ -99,9 +99,9 @@ static bool isSignExtendingOpW(const MachineInstr &MI) {
   case RISCV::SLTI:
   case RISCV::SLTU:
   case RISCV::SLTIU:
-  case RISCV::SEXTB:
-  case RISCV::SEXTH:
-  case RISCV::ZEXTH_RV64:
+  case RISCV::SEXT_B:
+  case RISCV::SEXT_H:
+  case RISCV::ZEXT_H_RV64:
     return true;
   // shifting right sufficiently makes the value 32-bit sign-extended
   case RISCV::SRAI:
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 044dda0a1ccc..34c6e8e684ac 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -195,6 +195,7 @@ public:
 
     return 0;
   }
+  unsigned getMinVLen() const { return ZvlLen; }
   RISCVABI::ABI getTargetABI() const { return TargetABI; }
   bool isRegisterReservedByUser(Register i) const {
     assert(i < RISCV::NUM_TARGET_REGS && "Register out of range");
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index e950f9582f09..4d69040a4508 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -8,6 +8,7 @@
 
 #include "MCTargetDesc/SparcFixupKinds.h"
 #include "MCTargetDesc/SparcMCTargetDesc.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCExpr.h"
@@ -131,6 +132,23 @@ namespace {
       return Sparc::NumTargetFixupKinds;
     }
 
+    Optional<MCFixupKind> getFixupKind(StringRef Name) const override {
+      unsigned Type;
+      Type = llvm::StringSwitch<unsigned>(Name)
+#define ELF_RELOC(X, Y) .Case(#X, Y)
+#include "llvm/BinaryFormat/ELFRelocs/Sparc.def"
+#undef ELF_RELOC
+                 .Case("BFD_RELOC_NONE", ELF::R_SPARC_NONE)
+                 .Case("BFD_RELOC_8", ELF::R_SPARC_8)
+                 .Case("BFD_RELOC_16", ELF::R_SPARC_16)
+                 .Case("BFD_RELOC_32", ELF::R_SPARC_32)
+                 .Case("BFD_RELOC_64", ELF::R_SPARC_64)
+                 .Default(-1u);
+      if (Type == -1u)
+        return None;
+      return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
+    }
+
     const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
       const static MCFixupKindInfo InfosBE[Sparc::NumTargetFixupKinds] = {
         // name                    offset bits  flags
@@ -216,6 +234,11 @@ namespace {
         { "fixup_sparc_tls_le_lox10",   0,  0,  0 }
       };
 
+      // Fixup kinds from .reloc directive are like R_SPARC_NONE. They do
+      // not require any extra processing.
+      if (Kind >= FirstLiteralRelocationKind)
+        return MCAsmBackend::getFixupKindInfo(FK_NONE);
+
       if (Kind < FirstTargetFixupKind)
         return MCAsmBackend::getFixupKindInfo(Kind);
 
@@ -229,6 +252,8 @@ namespace {
 
     bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
                                const MCValue &Target) override {
+      if (Fixup.getKind() >= FirstLiteralRelocationKind)
+        return true;
       switch ((Sparc::Fixups)Fixup.getKind()) {
       default:
         return false;
@@ -299,6 +324,8 @@ namespace {
                     uint64_t Value, bool IsResolved,
                     const MCSubtargetInfo *STI) const override {
 
+      if (Fixup.getKind() >= FirstLiteralRelocationKind)
+        return;
       Value = adjustFixupValue(Fixup.getKind(), Value);
       if (!Value) return;           // Doesn't change encoding.
 
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
index bc508b45c3bd..02261dc5c4cd 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
@@ -42,6 +42,9 @@ unsigned SparcELFObjectWriter::getRelocType(MCContext &Ctx,
                                             const MCValue &Target,
                                             const MCFixup &Fixup,
                                             bool IsPCRel) const {
+  MCFixupKind Kind = Fixup.getKind();
+  if (Kind >= FirstLiteralRelocationKind)
+    return Kind - FirstLiteralRelocationKind;
 
   if (const SparcMCExpr *SExpr = dyn_cast<SparcMCExpr>(Fixup.getValue())) {
     if (SExpr->getKind() == SparcMCExpr::VK_Sparc_R_DISP32)
@@ -68,6 +71,7 @@ unsigned SparcELFObjectWriter::getRelocType(MCContext &Ctx,
   switch(Fixup.getTargetKind()) {
   default:
     llvm_unreachable("Unimplemented fixup -> relocation");
+  case FK_NONE:                  return ELF::R_SPARC_NONE;
   case FK_Data_1:                return ELF::R_SPARC_8;
   case FK_Data_2:                return ((Fixup.getOffset() % 2)
                                          ? ELF::R_SPARC_UA16
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index ccc7d0737f53..610627e7e3f0 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -80,6 +80,88 @@ MachineBasicBlock::iterator SystemZFrameLowering::eliminateCallFramePseudoInstr(
   }
 }
 
+namespace {
+struct SZFrameSortingObj {
+  bool IsValid = false;     // True if we care about this Object.
+  uint32_t ObjectIndex = 0; // Index of Object into MFI list.
+  uint64_t ObjectSize = 0;  // Size of Object in bytes.
+  uint32_t D12Count = 0;    // 12-bit displacement only.
+  uint32_t DPairCount = 0;  // 12 or 20 bit displacement.
+};
+typedef std::vector<SZFrameSortingObj> SZFrameObjVec;
+} // namespace
+
+// TODO: Move to base class.
+void SystemZELFFrameLowering::orderFrameObjects(
+    const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  const SystemZInstrInfo *TII =
+      static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+  // Make a vector of sorting objects to track all MFI objects and mark those
+  // to be sorted as valid.
+  if (ObjectsToAllocate.size() <= 1)
+    return;
+  SZFrameObjVec SortingObjects(MFI.getObjectIndexEnd());
+  for (auto &Obj : ObjectsToAllocate) {
+    SortingObjects[Obj].IsValid = true;
+    SortingObjects[Obj].ObjectIndex = Obj;
+    SortingObjects[Obj].ObjectSize = MFI.getObjectSize(Obj);
+  }
+
+  // Examine uses for each object and record short (12-bit) and "pair"
+  // displacement types.
+  for (auto &MBB : MF)
+    for (auto &MI : MBB) {
+      if (MI.isDebugInstr())
+        continue;
+      for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+        const MachineOperand &MO = MI.getOperand(I);
+        if (!MO.isFI())
+          continue;
+        int Index = MO.getIndex();
+        if (Index >= 0 && Index < MFI.getObjectIndexEnd() &&
+            SortingObjects[Index].IsValid) {
+          if (TII->hasDisplacementPairInsn(MI.getOpcode()))
+            SortingObjects[Index].DPairCount++;
+          else if (!(MI.getDesc().TSFlags & SystemZII::Has20BitOffset))
+            SortingObjects[Index].D12Count++;
+        }
+      }
+    }
+
+  // Sort all objects for short/paired displacements, which should be
+  // sufficient as it seems like all frame objects typically are within the
+  // long displacement range.  Sorting works by computing the "density" as
+  // Count / ObjectSize. The comparisons of two such fractions are refactored
+  // by multiplying both sides with A.ObjectSize * B.ObjectSize, in order to
+  // eliminate the (fp) divisions.  A higher density object needs to go after
+  // in the list in order for it to end up lower on the stack.
+  auto CmpD12 = [](const SZFrameSortingObj &A, const SZFrameSortingObj &B) {
+    // Put all invalid and variable sized objects at the end.
+    if (!A.IsValid || !B.IsValid)
+      return A.IsValid;
+    if (!A.ObjectSize || !B.ObjectSize)
+      return A.ObjectSize > 0;
+    uint64_t ADensityCmp = A.D12Count * B.ObjectSize;
+    uint64_t BDensityCmp = B.D12Count * A.ObjectSize;
+    if (ADensityCmp != BDensityCmp)
+      return ADensityCmp < BDensityCmp;
+    return A.DPairCount * B.ObjectSize < B.DPairCount * A.ObjectSize;
+  };
+  std::stable_sort(SortingObjects.begin(), SortingObjects.end(), CmpD12);
+
+  // Now modify the original list to represent the final order that
+  // we want.
+  unsigned Idx = 0;
+  for (auto &Obj : SortingObjects) {
+    // All invalid items are sorted at the end, so it's safe to stop.
+    if (!Obj.IsValid)
+      break;
+    ObjectsToAllocate[Idx++] = Obj.ObjectIndex;
+  }
+}
+
 bool SystemZFrameLowering::hasReservedCallFrame(
     const MachineFunction &MF) const {
   // The ELF ABI requires us to allocate 160 bytes of stack space for the
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index 3a1af888d8f9..2b3d7efed53b 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -77,6 +77,9 @@ public:
   bool hasFP(const MachineFunction &MF) const override;
   StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
                                      Register &FrameReg) const override;
+  void
+  orderFrameObjects(const MachineFunction &MF,
+                    SmallVectorImpl<int> &ObjectsToAllocate) const override;
 
   // Return the byte offset from the incoming stack pointer of Reg's
   // ABI-defined save slot.  Return 0 if no slot is defined for Reg.  Adjust
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index a8ddb8c62d18..de446f33f5f1 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -443,6 +443,11 @@ public:
                                   EVT VT) const override;
   bool isFPImmLegal(const APFloat &Imm, EVT VT,
                     bool ForCodeSize) const override;
+  bool ShouldShrinkFPConstant(EVT VT) const override {
+    // Do not shrink 64-bit FP constpool entries since LDEB is slower than
+    // LD, and having the full constant in memory enables reg/mem opcodes.
+    return VT != MVT::f64;
+  }
   bool hasInlineStackProbe(MachineFunction &MF) const override;
   bool isLegalICmpImmediate(int64_t Imm) const override;
   bool isLegalAddImmediate(int64_t Imm) const override;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 6db9bf3056b7..4b6aa60f5d55 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -1652,6 +1652,13 @@ unsigned SystemZInstrInfo::getOpcodeForOffset(unsigned Opcode,
   return 0;
 }
 
+bool SystemZInstrInfo::hasDisplacementPairInsn(unsigned Opcode) const {
+  const MCInstrDesc &MCID = get(Opcode);
+  if (MCID.TSFlags & SystemZII::Has20BitOffset)
+    return SystemZ::getDisp12Opcode(Opcode) >= 0;
+  return SystemZ::getDisp20Opcode(Opcode) >= 0;
+}
+
 unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const {
   switch (Opcode) {
   case SystemZ::L:      return SystemZ::LT;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index 396f56c7f59c..9e5b2729a707 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -312,6 +312,9 @@ public:
   // exists.
   unsigned getOpcodeForOffset(unsigned Opcode, int64_t Offset) const;
 
+  // Return true if Opcode has a mapping in 12 <-> 20 bit displacements.
+  bool hasDisplacementPairInsn(unsigned Opcode) const;
+
   // If Opcode is a load instruction that has a LOAD AND TEST form,
   // return the opcode for the testing form, otherwise return 0.
   unsigned getLoadAndTest(unsigned Opcode) const;
diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp
index 0412e524f800..0f1655718481 100644
--- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp
+++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp
@@ -167,3 +167,41 @@ wasm::ValType WebAssembly::regClassToValType(unsigned RC) {
     llvm_unreachable("unexpected type");
   }
 }
+
+void WebAssembly::wasmSymbolSetType(MCSymbolWasm *Sym, const Type *GlobalVT,
+                                    const SmallVector<MVT, 1> &VTs) {
+  assert(!Sym->getType());
+
+  // Tables are represented as Arrays in LLVM IR therefore
+  // they reach this point as aggregate Array types with an element type
+  // that is a reference type.
+  wasm::ValType Type;
+  bool IsTable = false;
+  if (GlobalVT->isArrayTy() &&
+      WebAssembly::isRefType(GlobalVT->getArrayElementType())) {
+    MVT VT;
+    IsTable = true;
+    switch (GlobalVT->getArrayElementType()->getPointerAddressSpace()) {
+    case WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF:
+      VT = MVT::funcref;
+      break;
+    case WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF:
+      VT = MVT::externref;
+      break;
+    default:
+      report_fatal_error("unhandled address space type");
+    }
+    Type = WebAssembly::toValType(VT);
+  } else if (VTs.size() == 1) {
+    Type = WebAssembly::toValType(VTs[0]);
+  } else
+    report_fatal_error("Aggregate globals not yet implemented");
+
+  if (IsTable) {
+    Sym->setType(wasm::WASM_SYMBOL_TYPE_TABLE);
+    Sym->setTableType(Type);
+  } else {
+    Sym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
+    Sym->setGlobalType(wasm::WasmGlobalType{uint8_t(Type), /*Mutable=*/true});
+  }
+}
diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h
index 042d51c7d6cb..cdb95d48398d 100644
--- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h
+++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h
@@ -17,6 +17,8 @@
 
 #include "llvm/ADT/Optional.h"
 #include "llvm/BinaryFormat/Wasm.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/MC/MCSymbolWasm.h"
 #include "llvm/Support/MachineValueType.h"
 
 namespace llvm {
@@ -41,6 +43,43 @@ enum class BlockType : unsigned {
   Multivalue = 0xffff,
 };
 
+enum WasmAddressSpace : unsigned {
+  // Default address space, for pointers to linear memory (stack, heap, data).
+  WASM_ADDRESS_SPACE_DEFAULT = 0,
+  // A non-integral address space for pointers to named objects outside of
+  // linear memory: WebAssembly globals or WebAssembly locals.  Loads and stores
+  // to these pointers are lowered to global.get / global.set or local.get /
+  // local.set, as appropriate.
+  WASM_ADDRESS_SPACE_VAR = 1,
+  // A non-integral address space for externref values
+  WASM_ADDRESS_SPACE_EXTERNREF = 10,
+  // A non-integral address space for funcref values
+  WASM_ADDRESS_SPACE_FUNCREF = 20,
+};
+
+inline bool isDefaultAddressSpace(unsigned AS) {
+  return AS == WASM_ADDRESS_SPACE_DEFAULT;
+}
+inline bool isWasmVarAddressSpace(unsigned AS) {
+  return AS == WASM_ADDRESS_SPACE_VAR;
+}
+inline bool isValidAddressSpace(unsigned AS) {
+  return isDefaultAddressSpace(AS) || isWasmVarAddressSpace(AS);
+}
+inline bool isFuncrefType(const Type *Ty) {
+  return isa<PointerType>(Ty) &&
+         Ty->getPointerAddressSpace() ==
+             WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF;
+}
+inline bool isExternrefType(const Type *Ty) {
+  return isa<PointerType>(Ty) &&
+         Ty->getPointerAddressSpace() ==
+             WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF;
+}
+inline bool isRefType(const Type *Ty) {
+  return isFuncrefType(Ty) || isExternrefType(Ty);
+}
+
 // Convert StringRef to ValType / HealType / BlockType
 
 Optional<wasm::ValType> parseType(StringRef Type);
@@ -68,6 +107,10 @@ wasm::ValType toValType(MVT Type);
 // Convert a register class to a wasm ValType.
 wasm::ValType regClassToValType(unsigned RC);
 
+/// Sets a Wasm Symbol Type.
+void wasmSymbolSetType(MCSymbolWasm *Sym, const Type *GlobalVT,
+                       const SmallVector<MVT, 1> &VTs);
+
 } // end namespace WebAssembly
 } // end namespace llvm
 
diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h
index 57e40f6cd8d7..cdfc758db7ac 100644
--- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h
+++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h
@@ -15,7 +15,6 @@
 #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_UTILS_WEBASSEMBLYUTILITIES_H
 #define LLVM_LIB_TARGET_WEBASSEMBLY_UTILS_WEBASSEMBLYUTILITIES_H
 
-#include "llvm/IR/DerivedTypes.h"
 #include "llvm/Support/CommandLine.h"
 
 namespace llvm {
@@ -30,43 +29,6 @@ class WebAssemblySubtarget;
 
 namespace WebAssembly {
 
-enum WasmAddressSpace : unsigned {
-  // Default address space, for pointers to linear memory (stack, heap, data).
-  WASM_ADDRESS_SPACE_DEFAULT = 0,
-  // A non-integral address space for pointers to named objects outside of
-  // linear memory: WebAssembly globals or WebAssembly locals.  Loads and stores
-  // to these pointers are lowered to global.get / global.set or local.get /
-  // local.set, as appropriate.
-  WASM_ADDRESS_SPACE_VAR = 1,
-  // A non-integral address space for externref values
-  WASM_ADDRESS_SPACE_EXTERNREF = 10,
-  // A non-integral address space for funcref values
-  WASM_ADDRESS_SPACE_FUNCREF = 20,
-};
-
-inline bool isDefaultAddressSpace(unsigned AS) {
-  return AS == WASM_ADDRESS_SPACE_DEFAULT;
-}
-inline bool isWasmVarAddressSpace(unsigned AS) {
-  return AS == WASM_ADDRESS_SPACE_VAR;
-}
-inline bool isValidAddressSpace(unsigned AS) {
-  return isDefaultAddressSpace(AS) || isWasmVarAddressSpace(AS);
-}
-inline bool isFuncrefType(const Type *Ty) {
-  return isa<PointerType>(Ty) &&
-         Ty->getPointerAddressSpace() ==
-             WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF;
-}
-inline bool isExternrefType(const Type *Ty) {
-  return isa<PointerType>(Ty) &&
-         Ty->getPointerAddressSpace() ==
-             WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF;
-}
-inline bool isRefType(const Type *Ty) {
-  return isFuncrefType(Ty) || isExternrefType(Ty);
-}
-
 bool isChild(const MachineInstr &MI, const WebAssemblyFunctionInfo &MFI);
 bool mayThrow(const MachineInstr &MI);
 
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index e3af6b2662ef..bf326e5106be 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -181,17 +181,11 @@ void WebAssemblyAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
 
   if (!Sym->getType()) {
     const WebAssemblyTargetLowering &TLI = *Subtarget->getTargetLowering();
-    SmallVector<EVT, 1> VTs;
-    ComputeValueVTs(TLI, GV->getParent()->getDataLayout(), GV->getValueType(),
-                    VTs);
-    if (VTs.size() != 1 ||
-        TLI.getNumRegisters(GV->getParent()->getContext(), VTs[0]) != 1)
-      report_fatal_error("Aggregate globals not yet implemented");
-    MVT VT = TLI.getRegisterType(GV->getParent()->getContext(), VTs[0]);
-    bool Mutable = true;
-    wasm::ValType Type = WebAssembly::toValType(VT);
-    Sym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
-    Sym->setGlobalType(wasm::WasmGlobalType{uint8_t(Type), Mutable});
+    SmallVector<MVT, 1> VTs;
+    Type *GlobalVT = GV->getValueType();
+    computeLegalValueVTs(TLI, GV->getParent()->getContext(),
+                         GV->getParent()->getDataLayout(), GlobalVT, VTs);
+    WebAssembly::wasmSymbolSetType(Sym, GlobalVT, VTs);
   }
 
   // If the GlobalVariable refers to a table, we handle it here instead of
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index 406edef8ff3f..8ddd414b043a 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -16,6 +16,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "Utils/WebAssemblyTypeUtilities.h"
 #include "Utils/WebAssemblyUtilities.h"
 #include "WebAssembly.h"
 #include "WebAssemblyMachineFunctionInfo.h"
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
index c45f7d7176b5..01baa3d9389d 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
@@ -19,7 +19,7 @@
 
 #include "WebAssemblyFrameLowering.h"
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
-#include "Utils/WebAssemblyUtilities.h"
+#include "Utils/WebAssemblyTypeUtilities.h"
 #include "WebAssembly.h"
 #include "WebAssemblyInstrInfo.h"
 #include "WebAssemblyMachineFunctionInfo.h"
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
index fe656753889f..b6c43be03aba 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
@@ -560,6 +560,9 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallBase *CI) {
       NEltArg = NEltArg.getValue() + 1;
     FnAttrs.addAllocSizeAttr(SizeArg, NEltArg);
   }
+  // In case the callee has 'noreturn' attribute, We need to remove it, because
+  // we expect invoke wrappers to return.
+  FnAttrs.removeAttribute(Attribute::NoReturn);
 
   // Reconstruct the AttributesList based on the vector we constructed.
   AttributeList NewCallAL = AttributeList::get(
@@ -630,9 +633,9 @@ static bool canLongjmp(const Value *Callee) {
 
   // Exception-catching related functions
   //
-  // We intentionally excluded __cxa_end_catch here even though it surely cannot
-  // longjmp, in order to maintain the unwind relationship from all existing
-  // catchpads (and calls within them) to catch.dispatch.longjmp.
+  // We intentionally treat __cxa_end_catch longjmpable in Wasm SjLj even though
+  // it surely cannot longjmp, in order to maintain the unwind relationship from
+  // all existing catchpads (and calls within them) to catch.dispatch.longjmp.
   //
   // In Wasm EH + Wasm SjLj, we
   // 1. Make all catchswitch and cleanuppad that unwind to caller unwind to
@@ -663,6 +666,8 @@ static bool canLongjmp(const Value *Callee) {
   //
   // The comment block in findWasmUnwindDestinations() in
   // SelectionDAGBuilder.cpp is addressing a similar problem.
+  if (CalleeName == "__cxa_end_catch")
+    return WebAssembly::WasmEnableSjLj;
   if (CalleeName == "__cxa_begin_catch" ||
       CalleeName == "__cxa_allocate_exception" || CalleeName == "__cxa_throw" ||
       CalleeName == "__clang_call_terminate")
@@ -869,15 +874,17 @@ static void nullifySetjmp(Function *F) {
   Function *SetjmpF = M.getFunction("setjmp");
   SmallVector<Instruction *, 1> ToErase;
 
-  for (User *U : SetjmpF->users()) {
-    auto *CI = dyn_cast<CallInst>(U);
-    // FIXME 'invoke' to setjmp can happen when we use Wasm EH + Wasm SjLj, but
-    // we don't support two being used together yet.
-    if (!CI)
-      report_fatal_error("Wasm EH + Wasm SjLj is not fully supported yet");
-    BasicBlock *BB = CI->getParent();
+  for (User *U : make_early_inc_range(SetjmpF->users())) {
+    auto *CB = cast<CallBase>(U);
+    BasicBlock *BB = CB->getParent();
     if (BB->getParent() != F) // in other function
       continue;
+    CallInst *CI = nullptr;
+    // setjmp cannot throw. So if it is an invoke, lower it to a call
+    if (auto *II = dyn_cast<InvokeInst>(CB))
+      CI = llvm::changeToCall(II);
+    else
+      CI = cast<CallInst>(CB);
     ToErase.push_back(CI);
     CI->replaceAllUsesWith(IRB.getInt32(0));
   }
@@ -1313,10 +1320,13 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
   SmallVector<PHINode *, 4> SetjmpRetPHIs;
   Function *SetjmpF = M.getFunction("setjmp");
   for (auto *U : make_early_inc_range(SetjmpF->users())) {
-    auto *CB = dyn_cast<CallBase>(U);
+    auto *CB = cast<CallBase>(U);
     BasicBlock *BB = CB->getParent();
     if (BB->getParent() != &F) // in other function
       continue;
+    if (CB->getOperandBundle(LLVMContext::OB_funclet))
+      report_fatal_error(
+          "setjmp within a catch clause is not supported in Wasm EH");
 
     CallInst *CI = nullptr;
     // setjmp cannot throw. So if it is an invoke, lower it to a call
@@ -1815,10 +1825,10 @@ void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForWasmSjLj(
     BasicBlock *UnwindDest = nullptr;
     if (auto Bundle = CI->getOperandBundle(LLVMContext::OB_funclet)) {
       Instruction *FromPad = cast<Instruction>(Bundle->Inputs[0]);
-      while (!UnwindDest && FromPad) {
+      while (!UnwindDest) {
         if (auto *CPI = dyn_cast<CatchPadInst>(FromPad)) {
           UnwindDest = CPI->getCatchSwitch()->getUnwindDest();
-          FromPad = nullptr; // stop searching
+          break;
         } else if (auto *CPI = dyn_cast<CleanupPadInst>(FromPad)) {
           // getCleanupRetUnwindDest() can return nullptr when
           // 1. This cleanuppad's matching cleanupret uwninds to caller
@@ -1826,7 +1836,10 @@ void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForWasmSjLj(
           //    unreachable.
           // In case of 2, we need to traverse the parent pad chain.
           UnwindDest = getCleanupRetUnwindDest(CPI);
-          FromPad = cast<Instruction>(CPI->getParentPad());
+          Value *ParentPad = CPI->getParentPad();
+          if (isa<ConstantTokenNone>(ParentPad))
+            break;
+          FromPad = cast<Instruction>(ParentPad);
         }
       }
     }
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp
index 8ff916c28c4e..6fd87f10150d 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp
@@ -14,7 +14,7 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "Utils/WebAssemblyUtilities.h"
+#include "Utils/WebAssemblyTypeUtilities.h"
 #include "WebAssembly.h"
 #include "WebAssemblySubtarget.h"
 #include "llvm/IR/InstIterator.h"
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
index 09bccef17ab0..2e6027a5605c 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
@@ -59,39 +59,7 @@ WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
       SmallVector<MVT, 1> VTs;
       computeLegalValueVTs(CurrentFunc, TM, GlobalVT, VTs);
 
-      // Tables are represented as Arrays in LLVM IR therefore
-      // they reach this point as aggregate Array types with an element type
-      // that is a reference type.
-      wasm::ValType Type;
-      bool IsTable = false;
-      if (GlobalVT->isArrayTy() &&
-          WebAssembly::isRefType(GlobalVT->getArrayElementType())) {
-        MVT VT;
-        IsTable = true;
-        switch (GlobalVT->getArrayElementType()->getPointerAddressSpace()) {
-        case WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF:
-          VT = MVT::funcref;
-          break;
-        case WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF:
-          VT = MVT::externref;
-          break;
-        default:
-          report_fatal_error("unhandled address space type");
-        }
-        Type = WebAssembly::toValType(VT);
-      } else if (VTs.size() == 1) {
-        Type = WebAssembly::toValType(VTs[0]);
-      } else
-        report_fatal_error("Aggregate globals not yet implemented");
-
-      if (IsTable) {
-        WasmSym->setType(wasm::WASM_SYMBOL_TYPE_TABLE);
-        WasmSym->setTableType(Type);
-      } else {
-        WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
-        WasmSym->setGlobalType(
-            wasm::WasmGlobalType{uint8_t(Type), /*Mutable=*/true});
-      }
+      WebAssembly::wasmSymbolSetType(WasmSym, GlobalVT, VTs);
     }
     return WasmSym;
   }
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
index 00b11321fdb2..ea80e96d50de 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
@@ -30,22 +30,28 @@ void WebAssemblyFunctionInfo::initWARegs(MachineRegisterInfo &MRI) {
   WARegs.resize(MRI.getNumVirtRegs(), Reg);
 }
 
-void llvm::computeLegalValueVTs(const Function &F, const TargetMachine &TM,
+void llvm::computeLegalValueVTs(const WebAssemblyTargetLowering &TLI,
+                                LLVMContext &Ctx, const DataLayout &DL,
                                 Type *Ty, SmallVectorImpl<MVT> &ValueVTs) {
-  const DataLayout &DL(F.getParent()->getDataLayout());
-  const WebAssemblyTargetLowering &TLI =
-      *TM.getSubtarget<WebAssemblySubtarget>(F).getTargetLowering();
   SmallVector<EVT, 4> VTs;
   ComputeValueVTs(TLI, DL, Ty, VTs);
 
   for (EVT VT : VTs) {
-    unsigned NumRegs = TLI.getNumRegisters(F.getContext(), VT);
-    MVT RegisterVT = TLI.getRegisterType(F.getContext(), VT);
+    unsigned NumRegs = TLI.getNumRegisters(Ctx, VT);
+    MVT RegisterVT = TLI.getRegisterType(Ctx, VT);
     for (unsigned I = 0; I != NumRegs; ++I)
       ValueVTs.push_back(RegisterVT);
   }
 }
 
+void llvm::computeLegalValueVTs(const Function &F, const TargetMachine &TM,
+                                Type *Ty, SmallVectorImpl<MVT> &ValueVTs) {
+  const DataLayout &DL(F.getParent()->getDataLayout());
+  const WebAssemblyTargetLowering &TLI =
+      *TM.getSubtarget<WebAssemblySubtarget>(F).getTargetLowering();
+  computeLegalValueVTs(TLI, F.getContext(), DL, Ty, ValueVTs);
+}
+
 void llvm::computeSignatureVTs(const FunctionType *Ty,
                                const Function *TargetFunc,
                                const Function &ContextFunc,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
index 3fa2d0c8a2f2..413d0d1dc554 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
@@ -166,6 +166,10 @@ public:
   void setWasmEHFuncInfo(WasmEHFuncInfo *Info) { WasmEHInfo = Info; }
 };
 
+void computeLegalValueVTs(const WebAssemblyTargetLowering &TLI,
+                          LLVMContext &Ctx, const DataLayout &DL, Type *Ty,
+                          SmallVectorImpl<MVT> &ValueVTs);
+
 void computeLegalValueVTs(const Function &F, const TargetMachine &TM, Type *Ty,
                           SmallVectorImpl<MVT> &ValueVTs);
 
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index aff72452af6c..90753b5b4d33 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -805,8 +805,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
 
   // Some FP actions are always expanded for vector types.
-  for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32,
-                   MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
+  for (auto VT : { MVT::v8f16, MVT::v16f16, MVT::v32f16,
+                   MVT::v4f32, MVT::v8f32,  MVT::v16f32,
+                   MVT::v2f64, MVT::v4f64,  MVT::v8f64 }) {
     setOperationAction(ISD::FSIN,      VT, Expand);
     setOperationAction(ISD::FSINCOS,   VT, Expand);
     setOperationAction(ISD::FCOS,      VT, Expand);
@@ -1094,13 +1095,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       if (VT == MVT::v2i64) continue;
       setOperationAction(ISD::ROTL,             VT, Custom);
       setOperationAction(ISD::ROTR,             VT, Custom);
+      setOperationAction(ISD::FSHL,             VT, Custom);
+      setOperationAction(ISD::FSHR,             VT, Custom);
     }
 
-    setOperationAction(ISD::FSHL,       MVT::v16i8, Custom);
-    setOperationAction(ISD::FSHR,       MVT::v16i8, Custom);
-    setOperationAction(ISD::FSHL,       MVT::v4i32, Custom);
-    setOperationAction(ISD::FSHR,       MVT::v4i32, Custom);
-
     setOperationAction(ISD::STRICT_FSQRT,       MVT::v2f64, Legal);
     setOperationAction(ISD::STRICT_FADD,        MVT::v2f64, Legal);
     setOperationAction(ISD::STRICT_FSUB,        MVT::v2f64, Legal);
@@ -1958,6 +1956,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     // AVX512_FP16 scalar operations
     setGroup(MVT::f16);
     addRegisterClass(MVT::f16,    &X86::FR16XRegClass);
+    setOperationAction(ISD::FREM,                 MVT::f16, Promote);
+    setOperationAction(ISD::STRICT_FREM,          MVT::f16, Promote);
     setOperationAction(ISD::SELECT_CC,            MVT::f16, Expand);
     setOperationAction(ISD::BR_CC,                MVT::f16, Expand);
     setOperationAction(ISD::SETCC,                MVT::f16, Custom);
@@ -12571,6 +12571,8 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
   if (ForceV2Zero)
     V2 = getZeroVector(VT, Subtarget, DAG, DL);
 
+  unsigned NumElts = VT.getVectorNumElements();
+
   switch (VT.SimpleTy) {
   case MVT::v4i64:
   case MVT::v8i32:
@@ -12629,8 +12631,7 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
       return Masked;
 
     if (Subtarget.hasBWI() && Subtarget.hasVLX()) {
-      MVT IntegerType =
-          MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8));
+      MVT IntegerType = MVT::getIntegerVT(std::max<unsigned>(NumElts, 8));
       SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType);
       return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG);
     }
@@ -12699,8 +12700,7 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
 
     // Otherwise load an immediate into a GPR, cast to k-register, and use a
     // masked move.
-    MVT IntegerType =
-        MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8));
+    MVT IntegerType = MVT::getIntegerVT(std::max<unsigned>(NumElts, 8));
     SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType);
     return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG);
   }
@@ -29843,7 +29843,8 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
                            {Op0, Op1, Amt}, DAG, Subtarget);
     }
     assert((VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8 ||
-            VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) &&
+            VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v8i32 ||
+            VT == MVT::v16i32) &&
            "Unexpected funnel shift type!");
 
     // fshl(x,y,z) -> unpack(y,x) << (z & (bw-1))) >> bw.
@@ -29855,6 +29856,10 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
     SDValue AmtMod = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask);
     bool IsCst = ISD::isBuildVectorOfConstantSDNodes(AmtMod.getNode());
 
+    // Constant vXi16 funnel shifts can be efficiently handled by default.
+    if (IsCst && EltSizeInBits == 16)
+      return SDValue();
+
     unsigned ShiftOpc = IsFSHR ? ISD::SRL : ISD::SHL;
     unsigned NumElts = VT.getVectorNumElements();
     MVT ExtSVT = MVT::getIntegerVT(2 * EltSizeInBits);
@@ -29874,6 +29879,10 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
     // Attempt to fold scalar shift as unpack(y,x) << zext(splat(z))
     if (supportedVectorShiftWithBaseAmnt(ExtVT, Subtarget, ShiftOpc)) {
       if (SDValue ScalarAmt = DAG.getSplatValue(AmtMod)) {
+        // Uniform vXi16 funnel shifts can be efficiently handled by default.
+        if (EltSizeInBits == 16)
+          return SDValue();
+
         SDValue Lo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, Op1, Op0));
         SDValue Hi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, Op1, Op0));
         ScalarAmt = DAG.getZExtOrTrunc(ScalarAmt, DL, MVT::i32);
@@ -29912,7 +29921,7 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
     }
 
     // Attempt to fold per-element (ExtVT) shift as unpack(y,x) << zext(z)
-    if ((IsCst && !IsFSHR && EltSizeInBits == 8) ||
+    if (((IsCst || !Subtarget.hasAVX512()) && !IsFSHR && EltSizeInBits <= 16) ||
         supportedVectorVarShift(ExtVT, Subtarget, ShiftOpc)) {
       SDValue Z = DAG.getConstant(0, DL, VT);
       SDValue RLo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, Op1, Op0));
@@ -36477,9 +36486,8 @@ static SDValue narrowLoadToVZLoad(LoadSDNode *LN, MVT MemVT, MVT VT,
 // TODO: Investigate sharing more of this with shuffle lowering.
 static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
                               bool AllowFloatDomain, bool AllowIntDomain,
-                              SDValue &V1, const SDLoc &DL, SelectionDAG &DAG,
-                              const X86Subtarget &Subtarget, unsigned &Shuffle,
-                              MVT &SrcVT, MVT &DstVT) {
+                              SDValue V1, const X86Subtarget &Subtarget,
+                              unsigned &Shuffle, MVT &SrcVT, MVT &DstVT) {
   unsigned NumMaskElts = Mask.size();
   unsigned MaskEltSize = MaskVT.getScalarSizeInBits();
 
@@ -36522,9 +36530,6 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
                                             MVT::getIntegerVT(MaskEltSize);
         SrcVT = MVT::getVectorVT(ScalarTy, SrcSize / MaskEltSize);
 
-        if (SrcVT.getSizeInBits() != MaskVT.getSizeInBits())
-          V1 = extractSubVector(V1, 0, DAG, DL, SrcSize);
-
         Shuffle = unsigned(MatchAny ? ISD::ANY_EXTEND : ISD::ZERO_EXTEND);
         if (SrcVT.getVectorNumElements() != NumDstElts)
           Shuffle = getOpcode_EXTEND_VECTOR_INREG(Shuffle);
@@ -37102,6 +37107,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
   assert((Inputs.size() == 1 || Inputs.size() == 2) &&
          "Unexpected number of shuffle inputs!");
 
+  SDLoc DL(Root);
   MVT RootVT = Root.getSimpleValueType();
   unsigned RootSizeInBits = RootVT.getSizeInBits();
   unsigned NumRootElts = RootVT.getVectorNumElements();
@@ -37109,6 +37115,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
   // Canonicalize shuffle input op to the requested type.
   // TODO: Support cases where Op is smaller than VT.
   auto CanonicalizeShuffleInput = [&](MVT VT, SDValue Op) {
+    if (VT.getSizeInBits() < Op.getValueSizeInBits())
+      Op = extractSubVector(Op, 0, DAG, DL, VT.getSizeInBits());
     return DAG.getBitcast(VT, Op);
   };
 
@@ -37124,7 +37132,6 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
   assert(VT1.getSizeInBits() == RootSizeInBits &&
          VT2.getSizeInBits() == RootSizeInBits && "Vector size mismatch");
 
-  SDLoc DL(Root);
   SDValue Res;
 
   unsigned NumBaseMaskElts = BaseMask.size();
@@ -37393,15 +37400,13 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
       }
     }
 
-    SDValue NewV1 = V1; // Save operand in case early exit happens.
-    if (matchUnaryShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, NewV1,
-                          DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
-                          ShuffleVT) &&
+    if (matchUnaryShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, V1,
+                          Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT) &&
         (!IsMaskedShuffle ||
          (NumRootElts == ShuffleVT.getVectorNumElements()))) {
       if (Depth == 0 && Root.getOpcode() == Shuffle)
         return SDValue(); // Nothing to do!
-      Res = CanonicalizeShuffleInput(ShuffleSrcVT, NewV1);
+      Res = CanonicalizeShuffleInput(ShuffleSrcVT, V1);
       Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);
       return DAG.getBitcast(RootVT, Res);
     }
@@ -40903,6 +40908,28 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
       Known.One.setHighBits(ShAmt);
     return false;
   }
+  case X86ISD::BLENDV: {
+    SDValue Sel = Op.getOperand(0);
+    SDValue LHS = Op.getOperand(1);
+    SDValue RHS = Op.getOperand(2);
+
+    APInt SignMask = APInt::getSignMask(BitWidth);
+    SDValue NewSel = SimplifyMultipleUseDemandedBits(
+        Sel, SignMask, OriginalDemandedElts, TLO.DAG, Depth + 1);
+    SDValue NewLHS = SimplifyMultipleUseDemandedBits(
+        LHS, OriginalDemandedBits, OriginalDemandedElts, TLO.DAG, Depth + 1);
+    SDValue NewRHS = SimplifyMultipleUseDemandedBits(
+        RHS, OriginalDemandedBits, OriginalDemandedElts, TLO.DAG, Depth + 1);
+
+    if (NewSel || NewLHS || NewRHS) {
+      NewSel = NewSel ? NewSel : Sel;
+      NewLHS = NewLHS ? NewLHS : LHS;
+      NewRHS = NewRHS ? NewRHS : RHS;
+      return TLO.CombineTo(Op, TLO.DAG.getNode(X86ISD::BLENDV, SDLoc(Op), VT,
+                                               NewSel, NewLHS, NewRHS));
+    }
+    break;
+  }
   case X86ISD::PEXTRB:
   case X86ISD::PEXTRW: {
     SDValue Vec = Op.getOperand(0);
@@ -41043,6 +41070,13 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
     if (OriginalDemandedBits.countTrailingZeros() >= NumElts)
       return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
 
+    // See if we only demand bits from the lower 128-bit vector.
+    if (SrcVT.is256BitVector() &&
+        OriginalDemandedBits.getActiveBits() <= (NumElts / 2)) {
+      SDValue NewSrc = extract128BitVector(Src, 0, TLO.DAG, SDLoc(Src));
+      return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
+    }
+
     // Only demand the vector elements of the sign bits we need.
     APInt KnownUndef, KnownZero;
     APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
@@ -42238,19 +42272,14 @@ static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG,
       EVT MovmskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
       Movmsk = DAG.getBitcast(MovmskVT, Match);
     } else {
-      // For all_of(setcc(x,y,eq))
-      // - avoid vXi64 comparisons without PCMPEQQ (SSE41+), use PCMPEQD.
-      // - avoid vXi16 comparisons, use PMOVMSKB(PCMPEQB()).
+      // For all_of(setcc(x,y,eq)) - use PMOVMSKB(PCMPEQB()).
       if (BinOp == ISD::AND && Match.getOpcode() == ISD::SETCC &&
           cast<CondCodeSDNode>(Match.getOperand(2))->get() ==
               ISD::CondCode::SETEQ) {
-        SDValue Vec = Match.getOperand(0);
-        EVT VecSVT = Vec.getValueType().getScalarType();
-        if ((VecSVT == MVT::i16 && !Subtarget.hasBWI()) ||
-            (VecSVT == MVT::i64 && !Subtarget.hasSSE41())) {
-          NumElts *= 2;
-          VecSVT = VecSVT.getHalfSizedIntegerVT(*DAG.getContext());
-          EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), VecSVT, NumElts);
+        EVT VecSVT = Match.getOperand(0).getValueType().getScalarType();
+        if (VecSVT != MVT::i8) {
+          NumElts *= VecSVT.getSizeInBits() / 8;
+          EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, NumElts);
           MatchVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
           Match = DAG.getSetCC(
               DL, MatchVT, DAG.getBitcast(CmpVT, Match.getOperand(0)),
@@ -43079,6 +43108,38 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
     }
   }
 
+  // If this extract is from a loaded vector value and will be used as an
+  // integer, that requires a potentially expensive XMM -> GPR transfer.
+  // Additionally, if we can convert to a scalar integer load, that will likely
+  // be folded into a subsequent integer op.
+  // Note: Unlike the related fold for this in DAGCombiner, this is not limited
+  //       to a single-use of the loaded vector. For the reasons above, we
+  //       expect this to be profitable even if it creates an extra load.
+  bool LikelyUsedAsVector = any_of(N->uses(), [](SDNode *Use) {
+    return Use->getOpcode() == ISD::STORE ||
+           Use->getOpcode() == ISD::INSERT_VECTOR_ELT ||
+           Use->getOpcode() == ISD::SCALAR_TO_VECTOR;
+  });
+  auto *LoadVec = dyn_cast<LoadSDNode>(InputVector);
+  if (LoadVec && CIdx && ISD::isNormalLoad(LoadVec) && VT.isInteger() &&
+      SrcVT.getVectorElementType() == VT && DCI.isAfterLegalizeDAG() &&
+      !LikelyUsedAsVector) {
+    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+    SDValue NewPtr =
+        TLI.getVectorElementPointer(DAG, LoadVec->getBasePtr(), SrcVT, EltIdx);
+    unsigned PtrOff = VT.getSizeInBits() * CIdx->getZExtValue() / 8;
+    MachinePointerInfo MPI = LoadVec->getPointerInfo().getWithOffset(PtrOff);
+    Align Alignment = commonAlignment(LoadVec->getAlign(), PtrOff);
+    SDValue Load =
+        DAG.getLoad(VT, dl, LoadVec->getChain(), NewPtr, MPI, Alignment,
+                    LoadVec->getMemOperand()->getFlags(), LoadVec->getAAInfo());
+    SDValue Chain = Load.getValue(1);
+    SDValue From[] = {SDValue(N, 0), SDValue(LoadVec, 1)};
+    SDValue To[] = {Load, Chain};
+    DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
+    return SDValue(N, 0);
+  }
+
   return SDValue();
 }
 
@@ -44467,8 +44528,8 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
   unsigned NumEltBits = VecVT.getScalarSizeInBits();
 
   bool IsAnyOf = CmpOpcode == X86ISD::CMP && CmpVal.isZero();
-  bool IsAllOf = CmpOpcode == X86ISD::SUB && NumElts <= CmpBits &&
-                 CmpVal.isMask(NumElts);
+  bool IsAllOf = (CmpOpcode == X86ISD::SUB || CmpOpcode == X86ISD::CMP) &&
+                 NumElts <= CmpBits && CmpVal.isMask(NumElts);
   if (!IsAnyOf && !IsAllOf)
     return SDValue();
 
@@ -44500,14 +44561,16 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
   // MOVMSK(CONCAT(X,Y)) != 0 ->  MOVMSK(OR(X,Y)).
   // MOVMSK(CONCAT(X,Y)) == -1 ->  MOVMSK(AND(X,Y)).
   // MOVMSK(CONCAT(X,Y)) != -1 ->  MOVMSK(AND(X,Y)).
-  if (VecVT.is256BitVector()) {
+  if (VecVT.is256BitVector() && NumElts <= CmpBits) {
     SmallVector<SDValue> Ops;
     if (collectConcatOps(peekThroughBitcasts(Vec).getNode(), Ops) &&
         Ops.size() == 2) {
       SDLoc DL(EFLAGS);
-      EVT SubVT = Ops[0].getValueType();
+      EVT SubVT = Ops[0].getValueType().changeTypeToInteger();
       APInt CmpMask = APInt::getLowBitsSet(32, IsAnyOf ? 0 : NumElts / 2);
-      SDValue V = DAG.getNode(IsAnyOf ? ISD::OR : ISD::AND, DL, SubVT, Ops);
+      SDValue V = DAG.getNode(IsAnyOf ? ISD::OR : ISD::AND, DL, SubVT,
+                              DAG.getBitcast(SubVT, Ops[0]),
+                              DAG.getBitcast(SubVT, Ops[1]));
       V = DAG.getBitcast(VecVT.getHalfNumVectorElementsVT(), V);
       return DAG.getNode(X86ISD::CMP, DL, MVT::i32,
                          DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V),
@@ -44522,26 +44585,29 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
   if (IsAllOf && Subtarget.hasSSE41()) {
     MVT TestVT = VecVT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
     SDValue BC = peekThroughBitcasts(Vec);
-    if (BC.getOpcode() == X86ISD::PCMPEQ) {
-      SDValue V = DAG.getNode(ISD::SUB, SDLoc(BC), BC.getValueType(),
-                              BC.getOperand(0), BC.getOperand(1));
-      V = DAG.getBitcast(TestVT, V);
-      return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V);
-    }
-    // Check for 256-bit split vector cases.
-    if (BC.getOpcode() == ISD::AND &&
-        BC.getOperand(0).getOpcode() == X86ISD::PCMPEQ &&
-        BC.getOperand(1).getOpcode() == X86ISD::PCMPEQ) {
-      SDValue LHS = BC.getOperand(0);
-      SDValue RHS = BC.getOperand(1);
-      LHS = DAG.getNode(ISD::SUB, SDLoc(LHS), LHS.getValueType(),
-                        LHS.getOperand(0), LHS.getOperand(1));
-      RHS = DAG.getNode(ISD::SUB, SDLoc(RHS), RHS.getValueType(),
-                        RHS.getOperand(0), RHS.getOperand(1));
-      LHS = DAG.getBitcast(TestVT, LHS);
-      RHS = DAG.getBitcast(TestVT, RHS);
-      SDValue V = DAG.getNode(ISD::OR, SDLoc(EFLAGS), TestVT, LHS, RHS);
-      return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V);
+    // Ensure MOVMSK was testing every signbit of BC.
+    if (BC.getValueType().getVectorNumElements() <= NumElts) {
+      if (BC.getOpcode() == X86ISD::PCMPEQ) {
+        SDValue V = DAG.getNode(ISD::SUB, SDLoc(BC), BC.getValueType(),
+                                BC.getOperand(0), BC.getOperand(1));
+        V = DAG.getBitcast(TestVT, V);
+        return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V);
+      }
+      // Check for 256-bit split vector cases.
+      if (BC.getOpcode() == ISD::AND &&
+          BC.getOperand(0).getOpcode() == X86ISD::PCMPEQ &&
+          BC.getOperand(1).getOpcode() == X86ISD::PCMPEQ) {
+        SDValue LHS = BC.getOperand(0);
+        SDValue RHS = BC.getOperand(1);
+        LHS = DAG.getNode(ISD::SUB, SDLoc(LHS), LHS.getValueType(),
+                          LHS.getOperand(0), LHS.getOperand(1));
+        RHS = DAG.getNode(ISD::SUB, SDLoc(RHS), RHS.getValueType(),
+                          RHS.getOperand(0), RHS.getOperand(1));
+        LHS = DAG.getBitcast(TestVT, LHS);
+        RHS = DAG.getBitcast(TestVT, RHS);
+        SDValue V = DAG.getNode(ISD::OR, SDLoc(EFLAGS), TestVT, LHS, RHS);
+        return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V);
+      }
     }
   }
 
@@ -44575,7 +44641,8 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
       if (SDValue Src = getSplitVectorSrc(VecOp0, VecOp1, true)) {
         SDLoc DL(EFLAGS);
         SDValue Result = peekThroughBitcasts(Src);
-        if (IsAllOf && Result.getOpcode() == X86ISD::PCMPEQ) {
+        if (IsAllOf && Result.getOpcode() == X86ISD::PCMPEQ &&
+            Result.getValueType().getVectorNumElements() <= NumElts) {
           SDValue V = DAG.getNode(ISD::SUB, DL, Result.getValueType(),
                                   Result.getOperand(0), Result.getOperand(1));
           V = DAG.getBitcast(MVT::v4i64, V);
@@ -46840,14 +46907,18 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
       if (!getTargetConstantBitsFromNode(Op, EltSizeInBits, UndefElts, EltBits))
         return false;
 
+      APInt DemandedBits = APInt::getZero(EltSizeInBits);
       APInt DemandedElts = APInt::getZero(NumElts);
       for (int I = 0; I != NumElts; ++I)
-        if (!EltBits[I].isZero())
+        if (!EltBits[I].isZero()) {
+          DemandedBits |= EltBits[I];
           DemandedElts.setBit(I);
+        }
 
       APInt KnownUndef, KnownZero;
       return TLI.SimplifyDemandedVectorElts(OtherOp, DemandedElts, KnownUndef,
-                                            KnownZero, DCI);
+                                            KnownZero, DCI) ||
+             TLI.SimplifyDemandedBits(OtherOp, DemandedBits, DemandedElts, DCI);
     };
     if (SimplifyUndemandedElts(N0, N1) || SimplifyUndemandedElts(N1, N0)) {
       if (N->getOpcode() != ISD::DELETED_NODE)
@@ -49031,8 +49102,13 @@ static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG,
     return SDValue();
 
   // SSSE3's pshufb results in less instructions in the cases below.
-  if (Subtarget.hasSSSE3() && NumElems == 8 && InSVT != MVT::i64)
-    return SDValue();
+  if (Subtarget.hasSSSE3() && NumElems == 8) {
+    if (InSVT == MVT::i16)
+      return SDValue();
+    if (InSVT == MVT::i32 &&
+        (OutSVT == MVT::i8 || !Subtarget.hasSSE41() || Subtarget.hasInt256()))
+      return SDValue();
+  }
 
   SDLoc DL(N);
   // SSE2 provides PACKUS for only 2 x v8i16 -> v16i8 and SSE4.1 provides PACKUS
@@ -51110,6 +51186,30 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
                        DAG.getConstant(NotMask, DL, VT));
   }
 
+  // Fold movmsk(icmp_eq(and(x,c1),0)) -> movmsk(not(shl(x,c2)))
+  // iff pow2splat(c1).
+  if (Src.getOpcode() == X86ISD::PCMPEQ &&
+      Src.getOperand(0).getOpcode() == ISD::AND &&
+      ISD::isBuildVectorAllZeros(Src.getOperand(1).getNode())) {
+    SDValue LHS = Src.getOperand(0).getOperand(0);
+    SDValue RHS = Src.getOperand(0).getOperand(1);
+    KnownBits KnownRHS = DAG.computeKnownBits(RHS);
+    if (KnownRHS.isConstant() && KnownRHS.getConstant().isPowerOf2()) {
+      SDLoc DL(N);
+      MVT ShiftVT = SrcVT;
+      if (ShiftVT.getScalarType() == MVT::i8) {
+        // vXi8 shifts - we only care about the signbit so can use PSLLW.
+        ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
+        LHS = DAG.getBitcast(ShiftVT, LHS);
+      }
+      unsigned ShiftAmt = KnownRHS.getConstant().countLeadingZeros();
+      LHS = getTargetVShiftByConstNode(X86ISD::VSHLI, DL, ShiftVT, LHS,
+                                       ShiftAmt, DAG);
+      LHS = DAG.getNOT(DL, DAG.getBitcast(SrcVT, LHS), SrcVT);
+      return DAG.getNode(X86ISD::MOVMSK, DL, VT, LHS);
+    }
+  }
+
   // Simplify the inputs.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   APInt DemandedMask(APInt::getAllOnes(NumBits));
diff --git a/llvm/lib/Target/X86/X86LowerAMXType.cpp b/llvm/lib/Target/X86/X86LowerAMXType.cpp
index 7368b64efd9a..6206d8efb3d0 100644
--- a/llvm/lib/Target/X86/X86LowerAMXType.cpp
+++ b/llvm/lib/Target/X86/X86LowerAMXType.cpp
@@ -61,6 +61,8 @@
 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
 #include "llvm/Transforms/Utils/Local.h"
 
+#include <map>
+
 using namespace llvm;
 using namespace PatternMatch;
 
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index 92acfb93057a..9c16d3750998 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Config/llvm-config.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstIterator.h"
diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index ce3c5153bde2..e6a542385662 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -46,6 +46,7 @@
 #include "llvm/Analysis/LazyCallGraph.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/Argument.h"
@@ -365,26 +366,25 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
 
   // Loop over the argument list, transferring uses of the old arguments over to
   // the new arguments, also transferring over the names as well.
-  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
-                              I2 = NF->arg_begin();
-       I != E; ++I) {
-    if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
+  Function::arg_iterator I2 = NF->arg_begin();
+  for (Argument &Arg : F->args()) {
+    if (!ArgsToPromote.count(&Arg) && !ByValArgsToTransform.count(&Arg)) {
       // If this is an unmodified argument, move the name and users over to the
       // new version.
-      I->replaceAllUsesWith(&*I2);
-      I2->takeName(&*I);
+      Arg.replaceAllUsesWith(&*I2);
+      I2->takeName(&Arg);
       ++I2;
       continue;
     }
 
-    if (ByValArgsToTransform.count(&*I)) {
+    if (ByValArgsToTransform.count(&Arg)) {
       // In the callee, we create an alloca, and store each of the new incoming
       // arguments into the alloca.
       Instruction *InsertPt = &NF->begin()->front();
 
       // Just add all the struct element types.
-      Type *AgTy = I->getParamByValType();
-      Align StructAlign = *I->getParamAlign();
+      Type *AgTy = Arg.getParamByValType();
+      Align StructAlign = *Arg.getParamAlign();
       Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr,
                                         StructAlign, "", InsertPt);
       StructType *STy = cast<StructType>(AgTy);
@@ -397,41 +397,41 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
         Value *Idx = GetElementPtrInst::Create(
             AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i),
             InsertPt);
-        I2->setName(I->getName() + "." + Twine(i));
+        I2->setName(Arg.getName() + "." + Twine(i));
         Align Alignment = commonAlignment(StructAlign, SL->getElementOffset(i));
         new StoreInst(&*I2++, Idx, false, Alignment, InsertPt);
       }
 
       // Anything that used the arg should now use the alloca.
-      I->replaceAllUsesWith(TheAlloca);
-      TheAlloca->takeName(&*I);
+      Arg.replaceAllUsesWith(TheAlloca);
+      TheAlloca->takeName(&Arg);
       continue;
     }
 
     // There potentially are metadata uses for things like llvm.dbg.value.
     // Replace them with undef, after handling the other regular uses.
     auto RauwUndefMetadata = make_scope_exit(
-        [&]() { I->replaceAllUsesWith(UndefValue::get(I->getType())); });
+        [&]() { Arg.replaceAllUsesWith(UndefValue::get(Arg.getType())); });
 
-    if (I->use_empty())
+    if (Arg.use_empty())
       continue;
 
     // Otherwise, if we promoted this argument, then all users are load
     // instructions (or GEPs with only load users), and all loads should be
     // using the new argument that we added.
-    ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
+    ScalarizeTable &ArgIndices = ScalarizedElements[&Arg];
 
-    while (!I->use_empty()) {
-      if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) {
+    while (!Arg.use_empty()) {
+      if (LoadInst *LI = dyn_cast<LoadInst>(Arg.user_back())) {
         assert(ArgIndices.begin()->second.empty() &&
                "Load element should sort to front!");
-        I2->setName(I->getName() + ".val");
+        I2->setName(Arg.getName() + ".val");
         LI->replaceAllUsesWith(&*I2);
         LI->eraseFromParent();
-        LLVM_DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
+        LLVM_DEBUG(dbgs() << "*** Promoted load of argument '" << Arg.getName()
                           << "' in function '" << F->getName() << "'\n");
       } else {
-        GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back());
+        GetElementPtrInst *GEP = cast<GetElementPtrInst>(Arg.user_back());
         assert(!GEP->use_empty() &&
                "GEPs without uses should be cleaned up already");
         IndicesVector Operands;
@@ -449,7 +449,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
           assert(It != ArgIndices.end() && "GEP not handled??");
         }
 
-        TheArg->setName(formatv("{0}.{1:$[.]}.val", I->getName(),
+        TheArg->setName(formatv("{0}.{1:$[.]}.val", Arg.getName(),
                                 make_range(Operands.begin(), Operands.end())));
 
         LLVM_DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName()
@@ -610,12 +610,12 @@ static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR
     return true;
   };
 
-  // First, iterate the entry block and mark loads of (geps of) arguments as
-  // safe.
+  // First, iterate functions that are guaranteed to execution on function
+  // entry and mark loads of (geps of) arguments as safe.
   BasicBlock &EntryBlock = Arg->getParent()->front();
   // Declare this here so we can reuse it
   IndicesVector Indices;
-  for (Instruction &I : EntryBlock)
+  for (Instruction &I : EntryBlock) {
     if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
       Value *V = LI->getPointerOperand();
       if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
@@ -649,6 +649,10 @@ static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR
       }
     }
 
+    if (!isGuaranteedToTransferExecutionToSuccessor(&I))
+      break;
+  }
+
   // Now, iterate all uses of the argument to see if there are any uses that are
   // not (GEP+)loads, or any (GEP+)loads that are not safe to promote.
   SmallVector<LoadInst *, 16> Loads;
@@ -830,7 +834,10 @@ static bool canPaddingBeAccessed(Argument *arg) {
   return false;
 }
 
-bool ArgumentPromotionPass::areFunctionArgsABICompatible(
+/// Check if callers and the callee \p F agree how promoted arguments would be
+/// passed. The ones that they do not agree on are eliminated from the sets but
+/// the return value has to be observed as well.
+static bool areFunctionArgsABICompatible(
     const Function &F, const TargetTransformInfo &TTI,
     SmallPtrSetImpl<Argument *> &ArgsToPromote,
     SmallPtrSetImpl<Argument *> &ByValArgsToTransform) {
@@ -1003,7 +1010,7 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
   if (ArgsToPromote.empty() && ByValArgsToTransform.empty())
     return nullptr;
 
-  if (!ArgumentPromotionPass::areFunctionArgsABICompatible(
+  if (!areFunctionArgsABICompatible(
           *F, TTI, ArgsToPromote, ByValArgsToTransform))
     return nullptr;
 
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 12b8a0ef9d00..d66140a726f6 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -183,6 +183,31 @@ ChangeStatus &llvm::operator&=(ChangeStatus &L, ChangeStatus R) {
 }
 ///}
 
+bool AA::isNoSyncInst(Attributor &A, const Instruction &I,
+                      const AbstractAttribute &QueryingAA) {
+  // We are looking for volatile instructions or non-relaxed atomics.
+  if (const auto *CB = dyn_cast<CallBase>(&I)) {
+    if (CB->hasFnAttr(Attribute::NoSync))
+      return true;
+
+    // Non-convergent and readnone imply nosync.
+    if (!CB->isConvergent() && !CB->mayReadOrWriteMemory())
+      return true;
+
+    if (AANoSync::isNoSyncIntrinsic(&I))
+      return true;
+
+    const auto &NoSyncAA = A.getAAFor<AANoSync>(
+        QueryingAA, IRPosition::callsite_function(*CB), DepClassTy::OPTIONAL);
+    return NoSyncAA.isAssumedNoSync();
+  }
+
+  if (!I.mayReadOrWriteMemory())
+    return true;
+
+  return !I.isVolatile() && !AANoSync::isNonRelaxedAtomic(&I);
+}
+
 bool AA::isDynamicallyUnique(Attributor &A, const AbstractAttribute &QueryingAA,
                              const Value &V) {
   if (auto *C = dyn_cast<Constant>(&V))
@@ -370,6 +395,162 @@ bool AA::getPotentialCopiesOfStoredValue(
   return true;
 }
 
+static bool isAssumedReadOnlyOrReadNone(Attributor &A, const IRPosition &IRP,
+                                        const AbstractAttribute &QueryingAA,
+                                        bool RequireReadNone, bool &IsKnown) {
+
+  IRPosition::Kind Kind = IRP.getPositionKind();
+  if (Kind == IRPosition::IRP_FUNCTION || Kind == IRPosition::IRP_CALL_SITE) {
+    const auto &MemLocAA =
+        A.getAAFor<AAMemoryLocation>(QueryingAA, IRP, DepClassTy::NONE);
+    if (MemLocAA.isAssumedReadNone()) {
+      IsKnown = MemLocAA.isKnownReadNone();
+      if (!IsKnown)
+        A.recordDependence(MemLocAA, QueryingAA, DepClassTy::OPTIONAL);
+      return true;
+    }
+  }
+
+  const auto &MemBehaviorAA =
+      A.getAAFor<AAMemoryBehavior>(QueryingAA, IRP, DepClassTy::NONE);
+  if (MemBehaviorAA.isAssumedReadNone() ||
+      (!RequireReadNone && MemBehaviorAA.isAssumedReadOnly())) {
+    IsKnown = RequireReadNone ? MemBehaviorAA.isKnownReadNone()
+                              : MemBehaviorAA.isKnownReadOnly();
+    if (!IsKnown)
+      A.recordDependence(MemBehaviorAA, QueryingAA, DepClassTy::OPTIONAL);
+    return true;
+  }
+
+  return false;
+}
+
+bool AA::isAssumedReadOnly(Attributor &A, const IRPosition &IRP,
+                           const AbstractAttribute &QueryingAA, bool &IsKnown) {
+  return isAssumedReadOnlyOrReadNone(A, IRP, QueryingAA,
+                                     /* RequireReadNone */ false, IsKnown);
+}
+bool AA::isAssumedReadNone(Attributor &A, const IRPosition &IRP,
+                           const AbstractAttribute &QueryingAA, bool &IsKnown) {
+  return isAssumedReadOnlyOrReadNone(A, IRP, QueryingAA,
+                                     /* RequireReadNone */ true, IsKnown);
+}
+
+static bool
+isPotentiallyReachable(Attributor &A, const Instruction &FromI,
+                       const Instruction *ToI, const Function &ToFn,
+                       const AbstractAttribute &QueryingAA,
+                       std::function<bool(const Function &F)> GoBackwardsCB) {
+  LLVM_DEBUG(dbgs() << "[AA] isPotentiallyReachable @" << ToFn.getName()
+                    << " from " << FromI << " [GBCB: " << bool(GoBackwardsCB)
+                    << "]\n");
+
+  SmallPtrSet<const Instruction *, 8> Visited;
+  SmallVector<const Instruction *> Worklist;
+  Worklist.push_back(&FromI);
+
+  while (!Worklist.empty()) {
+    const Instruction *CurFromI = Worklist.pop_back_val();
+    if (!Visited.insert(CurFromI).second)
+      continue;
+
+    const Function *FromFn = CurFromI->getFunction();
+    if (FromFn == &ToFn) {
+      if (!ToI)
+        return true;
+      LLVM_DEBUG(dbgs() << "[AA] check " << *ToI << " from " << *CurFromI
+                        << " intraprocedurally\n");
+      const auto &ReachabilityAA = A.getAAFor<AAReachability>(
+          QueryingAA, IRPosition::function(ToFn), DepClassTy::OPTIONAL);
+      bool Result = ReachabilityAA.isAssumedReachable(A, *CurFromI, *ToI);
+      LLVM_DEBUG(dbgs() << "[AA] " << *CurFromI << " "
+                        << (Result ? "can potentially " : "cannot ") << "reach "
+                        << *ToI << " [Intra]\n");
+      if (Result)
+        return true;
+      continue;
+    }
+
+    // TODO: If we can go arbitrarily backwards we will eventually reach an
+    // entry point that can reach ToI. Only once this takes a set of blocks
+    // through which we cannot go, or once we track internal functions not
+    // accessible from the outside, it makes sense to perform backwards analysis
+    // in the absence of a GoBackwardsCB.
+    if (!GoBackwardsCB) {
+      LLVM_DEBUG(dbgs() << "[AA] check @" << ToFn.getName() << " from "
+                        << *CurFromI << " is not checked backwards, abort\n");
+      return true;
+    }
+
+    // Check if the current instruction is already known to reach the ToFn.
+    const auto &FnReachabilityAA = A.getAAFor<AAFunctionReachability>(
+        QueryingAA, IRPosition::function(*FromFn), DepClassTy::OPTIONAL);
+    bool Result = FnReachabilityAA.instructionCanReach(
+        A, *CurFromI, ToFn, /* UseBackwards */ false);
+    LLVM_DEBUG(dbgs() << "[AA] " << *CurFromI << " in @" << FromFn->getName()
+                      << " " << (Result ? "can potentially " : "cannot ")
+                      << "reach @" << ToFn.getName() << " [FromFn]\n");
+    if (Result)
+      return true;
+
+    // If we do not go backwards from the FromFn we are done here and so far we
+    // could not find a way to reach ToFn/ToI.
+    if (!GoBackwardsCB(*FromFn))
+      continue;
+
+    LLVM_DEBUG(dbgs() << "Stepping backwards to the call sites of @"
+                      << FromFn->getName() << "\n");
+
+    auto CheckCallSite = [&](AbstractCallSite ACS) {
+      CallBase *CB = ACS.getInstruction();
+      if (!CB)
+        return false;
+
+      if (isa<InvokeInst>(CB))
+        return false;
+
+      Instruction *Inst = CB->getNextNonDebugInstruction();
+      Worklist.push_back(Inst);
+      return true;
+    };
+
+    bool AllCallSitesKnown;
+    Result = !A.checkForAllCallSites(CheckCallSite, *FromFn,
+                                     /* RequireAllCallSites */ true,
+                                     &QueryingAA, AllCallSitesKnown);
+    if (Result) {
+      LLVM_DEBUG(dbgs() << "[AA] stepping back to call sites from " << *CurFromI
+                        << " in @" << FromFn->getName()
+                        << " failed, give up\n");
+      return true;
+    }
+
+    LLVM_DEBUG(dbgs() << "[AA] stepped back to call sites from " << *CurFromI
+                      << " in @" << FromFn->getName()
+                      << " worklist size is: " << Worklist.size() << "\n");
+  }
+  return false;
+}
+
+bool AA::isPotentiallyReachable(
+    Attributor &A, const Instruction &FromI, const Instruction &ToI,
+    const AbstractAttribute &QueryingAA,
+    std::function<bool(const Function &F)> GoBackwardsCB) {
+  LLVM_DEBUG(dbgs() << "[AA] isPotentiallyReachable " << ToI << " from "
+                    << FromI << " [GBCB: " << bool(GoBackwardsCB) << "]\n");
+  const Function *ToFn = ToI.getFunction();
+  return ::isPotentiallyReachable(A, FromI, &ToI, *ToFn, QueryingAA,
+                                  GoBackwardsCB);
+}
+
+bool AA::isPotentiallyReachable(
+    Attributor &A, const Instruction &FromI, const Function &ToFn,
+    const AbstractAttribute &QueryingAA,
+    std::function<bool(const Function &F)> GoBackwardsCB) {
+  return ::isPotentiallyReachable(A, FromI, /* ToI */ nullptr, ToFn, QueryingAA,
+                                  GoBackwardsCB);
+}
+
 /// Return true if \p New is equal or worse than \p Old.
 static bool isEqualOrWorse(const Attribute &New, const Attribute &Old) {
   if (!Old.isIntAttribute())
@@ -704,9 +885,8 @@ void IRPosition::verify() {
            "Expected a nullptr for an invalid position!");
     return;
   case IRP_FLOAT:
-    assert((!isa<CallBase>(&getAssociatedValue()) &&
-            !isa<Argument>(&getAssociatedValue())) &&
-           "Expected specialized kind for call base and argument values!");
+    assert((!isa<Argument>(&getAssociatedValue())) &&
+           "Expected specialized kind for argument values!");
     return;
   case IRP_RETURNED:
     assert(isa<Function>(getAsValuePtr()) &&
@@ -900,7 +1080,7 @@ bool Attributor::isAssumedDead(const Use &U,
                          UsedAssumedInformation, CheckBBLivenessOnly, DepClass);
   }
 
-  return isAssumedDead(IRPosition::value(*UserI), QueryingAA, FnLivenessAA,
+  return isAssumedDead(IRPosition::inst(*UserI), QueryingAA, FnLivenessAA,
                        UsedAssumedInformation, CheckBBLivenessOnly, DepClass);
 }
 
@@ -923,7 +1103,8 @@ bool Attributor::isAssumedDead(const Instruction &I,
   // If we have a context instruction and a liveness AA we use it.
   if (FnLivenessAA &&
       FnLivenessAA->getIRPosition().getAnchorScope() == I.getFunction() &&
-      FnLivenessAA->isAssumedDead(&I)) {
+      (CheckBBLivenessOnly ? FnLivenessAA->isAssumedDead(I.getParent())
+                           : FnLivenessAA->isAssumedDead(&I))) {
     if (QueryingAA)
       recordDependence(*FnLivenessAA, *QueryingAA, DepClass);
     if (!FnLivenessAA->isKnownDead(&I))
@@ -934,8 +1115,9 @@ bool Attributor::isAssumedDead(const Instruction &I,
   if (CheckBBLivenessOnly)
     return false;
 
-  const AAIsDead &IsDeadAA = getOrCreateAAFor<AAIsDead>(
-      IRPosition::value(I, CBCtx), QueryingAA, DepClassTy::NONE);
+  const IRPosition IRP = IRPosition::inst(I, CBCtx);
+  const AAIsDead &IsDeadAA =
+      getOrCreateAAFor<AAIsDead>(IRP, QueryingAA, DepClassTy::NONE);
   // Don't check liveness for AAIsDead.
   if (QueryingAA == &IsDeadAA)
     return false;
@@ -1035,8 +1217,14 @@ bool Attributor::checkForAllUses(
     const Use *U = Worklist.pop_back_val();
     if (isa<PHINode>(U->getUser()) && !Visited.insert(U).second)
       continue;
-    LLVM_DEBUG(dbgs() << "[Attributor] Check use: " << **U << " in "
-                      << *U->getUser() << "\n");
+    LLVM_DEBUG({
+      if (auto *Fn = dyn_cast<Function>(U->getUser()))
+        dbgs() << "[Attributor] Check use: " << **U << " in " << Fn->getName()
+               << "\n";
+      else
+        dbgs() << "[Attributor] Check use: " << **U << " in " << *U->getUser()
+               << "\n";
+    });
     bool UsedAssumedInformation = false;
     if (isAssumedDead(*U, &QueryingAA, LivenessAA, UsedAssumedInformation,
                       CheckBBLivenessOnly, LivenessDepClass)) {
@@ -1126,8 +1314,14 @@ bool Attributor::checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
   SmallVector<const Use *, 8> Uses(make_pointer_range(Fn.uses()));
   for (unsigned u = 0; u < Uses.size(); ++u) {
     const Use &U = *Uses[u];
-    LLVM_DEBUG(dbgs() << "[Attributor] Check use: " << *U << " in "
-                      << *U.getUser() << "\n");
+    LLVM_DEBUG({
+      if (auto *Fn = dyn_cast<Function>(U))
+        dbgs() << "[Attributor] Check use: " << Fn->getName() << " in "
+               << *U.getUser() << "\n";
+      else
+        dbgs() << "[Attributor] Check use: " << *U << " in " << *U.getUser()
+               << "\n";
+    });
     bool UsedAssumedInformation = false;
     if (isAssumedDead(U, QueryingAA, nullptr, UsedAssumedInformation,
                       /* CheckBBLivenessOnly */ true)) {
@@ -1268,9 +1462,12 @@ static bool checkForAllInstructionsImpl(
     for (Instruction *I : *Insts) {
       // Skip dead instructions.
       if (A && !CheckPotentiallyDead &&
-          A->isAssumedDead(IRPosition::value(*I), QueryingAA, LivenessAA,
-                           UsedAssumedInformation, CheckBBLivenessOnly))
+          A->isAssumedDead(IRPosition::inst(*I), QueryingAA, LivenessAA,
+                           UsedAssumedInformation, CheckBBLivenessOnly)) {
+        LLVM_DEBUG(dbgs() << "[Attributor] Instruction " << *I
+                          << " is potentially dead, skip!\n";);
         continue;
+      }
 
       if (!Pred(*I))
         return false;
@@ -1329,7 +1526,7 @@ bool Attributor::checkForAllReadWriteInstructions(
   for (Instruction *I :
        InfoCache.getReadOrWriteInstsForFunction(*AssociatedFunction)) {
     // Skip dead instructions.
-    if (isAssumedDead(IRPosition::value(*I), &QueryingAA, &LivenessAA,
+    if (isAssumedDead(IRPosition::inst(*I), &QueryingAA, &LivenessAA,
                       UsedAssumedInformation))
       continue;
 
@@ -1381,9 +1578,11 @@ void Attributor::runTillFixpoint() {
         InvalidAA->Deps.pop_back();
         AbstractAttribute *DepAA = cast<AbstractAttribute>(Dep.getPointer());
         if (Dep.getInt() == unsigned(DepClassTy::OPTIONAL)) {
+          LLVM_DEBUG(dbgs() << " - recompute: " << *DepAA);
           Worklist.insert(DepAA);
           continue;
         }
+        LLVM_DEBUG(dbgs() << " - invalidate: " << *DepAA);
         DepAA->getState().indicatePessimisticFixpoint();
         assert(DepAA->getState().isAtFixpoint() && "Expected fixpoint state!");
         if (!DepAA->getState().isValidState())
@@ -1433,6 +1632,9 @@ void Attributor::runTillFixpoint() {
     // Note that dependent ones are added above.
     Worklist.clear();
     Worklist.insert(ChangedAAs.begin(), ChangedAAs.end());
+    Worklist.insert(QueryAAsAwaitingUpdate.begin(),
+                    QueryAAsAwaitingUpdate.end());
+    QueryAAsAwaitingUpdate.clear();
 
   } while (!Worklist.empty() && (IterationCounter++ < MaxFixedPointIterations ||
                                  VerifyMaxFixpointIterations));
@@ -1492,6 +1694,12 @@ void Attributor::runTillFixpoint() {
   }
 }
 
+void Attributor::registerForUpdate(AbstractAttribute &AA) {
+  assert(AA.isQueryAA() &&
+         "Non-query AAs should not be required to register for updates!");
+  QueryAAsAwaitingUpdate.insert(&AA);
+}
+
 ChangeStatus Attributor::manifestAttributes() {
   TimeTraceScope TimeScope("Attributor::manifestAttributes");
   size_t NumFinalAAs = DG.SyntheticRoot.Deps.size();
@@ -1792,7 +2000,7 @@ ChangeStatus Attributor::cleanupIR() {
     // Actually we do not delete the blocks but squash them into a single
     // unreachable but untangling branches that jump here is something we need
     // to do in a more generic way.
-    DetatchDeadBlocks(ToBeDeletedBBs, nullptr);
+    detachDeadBlocks(ToBeDeletedBBs, nullptr);
   }
 
   identifyDeadInternalFunctions();
@@ -1897,7 +2105,7 @@ ChangeStatus Attributor::updateAA(AbstractAttribute &AA) {
                      /* CheckBBLivenessOnly */ true))
     CS = AA.update(*this);
 
-  if (DV.empty()) {
+  if (!AA.isQueryAA() && DV.empty()) {
     // If the attribute did not query any non-fix information, the state
     // will not change and we can indicate that right away.
     AAState.indicateOptimisticFixpoint();
@@ -2601,12 +2809,12 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
 
   auto CallSitePred = [&](Instruction &I) -> bool {
     auto &CB = cast<CallBase>(I);
-    IRPosition CBRetPos = IRPosition::callsite_returned(CB);
+    IRPosition CBInstPos = IRPosition::inst(CB);
     IRPosition CBFnPos = IRPosition::callsite_function(CB);
 
     // Call sites might be dead if they do not have side effects and no live
     // users. The return value might be dead if there are no live users.
-    getOrCreateAAFor<AAIsDead>(CBRetPos);
+    getOrCreateAAFor<AAIsDead>(CBInstPos);
 
     Function *Callee = CB.getCalledFunction();
     // TODO: Even if the callee is not known now we might be able to simplify
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 76420783b2d1..2d88e329e093 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -15,6 +15,7 @@
 
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
@@ -68,6 +69,12 @@ static cl::opt<unsigned, true> MaxPotentialValues(
     cl::location(llvm::PotentialConstantIntValuesState::MaxPotentialValues),
     cl::init(7));
 
+static cl::opt<unsigned>
+    MaxInterferingWrites("attributor-max-interfering-writes", cl::Hidden,
+                         cl::desc("Maximum number of interfering writes to "
+                                  "check before assuming all might interfere."),
+                         cl::init(6));
+
 STATISTIC(NumAAs, "Number of abstract attributes created");
 
 // Some helper macros to deal with statistics tracking.
@@ -244,6 +251,8 @@ static Value *constructPointer(Type *ResTy, Type *PtrElemTy, Value *Ptr,
 /// once. Note that the value used for the callback may still be the value
 /// associated with \p IRP (due to PHIs). To limit how much effort is invested,
 /// we will never visit more values than specified by \p MaxValues.
+/// If \p Intraprocedural is set to true only values valid in the scope of
+/// \p CtxI will be visited and simplification into other scopes is prevented.
 template <typename StateTy>
 static bool genericValueTraversal(
     Attributor &A, IRPosition IRP, const AbstractAttribute &QueryingAA,
@@ -251,7 +260,8 @@ static bool genericValueTraversal(
     function_ref<bool(Value &, const Instruction *, StateTy &, bool)>
         VisitValueCB,
     const Instruction *CtxI, bool UseValueSimplify = true, int MaxValues = 16,
-    function_ref<Value *(Value *)> StripCB = nullptr) {
+    function_ref<Value *(Value *)> StripCB = nullptr,
+    bool Intraprocedural = false) {
 
   const AAIsDead *LivenessAA = nullptr;
   if (IRP.getAnchorScope())
@@ -281,8 +291,11 @@ static bool genericValueTraversal(
       continue;
 
     // Make sure we limit the compile time for complex expressions.
-    if (Iteration++ >= MaxValues)
+    if (Iteration++ >= MaxValues) {
+      LLVM_DEBUG(dbgs() << "Generic value traversal reached iteration limit: "
+                        << Iteration << "!\n");
       return false;
+    }
 
     // Explicitly look through calls with a "returned" attribute if we do
     // not have a pointer as stripPointerCasts only works on them.
@@ -331,10 +344,7 @@ static bool genericValueTraversal(
              "Expected liveness in the presence of instructions!");
       for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) {
         BasicBlock *IncomingBB = PHI->getIncomingBlock(u);
-        bool UsedAssumedInformation = false;
-        if (A.isAssumedDead(*IncomingBB->getTerminator(), &QueryingAA,
-                            LivenessAA, UsedAssumedInformation,
-                            /* CheckBBLivenessOnly */ true)) {
+        if (LivenessAA->isEdgeDead(IncomingBB, PHI->getParent())) {
           AnyDead = true;
           continue;
         }
@@ -344,24 +354,49 @@ static bool genericValueTraversal(
       continue;
     }
 
+    if (auto *Arg = dyn_cast<Argument>(V)) {
+      if (!Intraprocedural && !Arg->hasPassPointeeByValueCopyAttr()) {
+        SmallVector<Item> CallSiteValues;
+        bool AllCallSitesKnown = true;
+        if (A.checkForAllCallSites(
+                [&](AbstractCallSite ACS) {
+                  // Callbacks might not have a corresponding call site operand,
+                  // stick with the argument in that case.
+                  Value *CSOp = ACS.getCallArgOperand(*Arg);
+                  if (!CSOp)
+                    return false;
+                  CallSiteValues.push_back({CSOp, ACS.getInstruction()});
+                  return true;
+                },
+                *Arg->getParent(), true, &QueryingAA, AllCallSitesKnown)) {
+          Worklist.append(CallSiteValues);
+          continue;
+        }
+      }
+    }
+
     if (UseValueSimplify && !isa<Constant>(V)) {
       bool UsedAssumedInformation = false;
       Optional<Value *> SimpleV =
           A.getAssumedSimplified(*V, QueryingAA, UsedAssumedInformation);
       if (!SimpleV.hasValue())
         continue;
-      if (!SimpleV.getValue())
-        return false;
       Value *NewV = SimpleV.getValue();
-      if (NewV != V) {
-        Worklist.push_back({NewV, CtxI});
-        continue;
+      if (NewV && NewV != V) {
+        if (!Intraprocedural || !CtxI ||
+            AA::isValidInScope(*NewV, CtxI->getFunction())) {
+          Worklist.push_back({NewV, CtxI});
+          continue;
+        }
       }
     }
 
     // Once a leaf is reached we inform the user through the callback.
-    if (!VisitValueCB(*V, CtxI, State, Iteration > 1))
+    if (!VisitValueCB(*V, CtxI, State, Iteration > 1)) {
+      LLVM_DEBUG(dbgs() << "Generic value traversal visit callback failed for: "
+                        << *V << "!\n");
       return false;
+    }
   } while (!Worklist.empty());
 
   // If we actually used liveness information so we have to record a dependence.
@@ -375,7 +410,8 @@ static bool genericValueTraversal(
 bool AA::getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr,
                                      SmallVectorImpl<Value *> &Objects,
                                      const AbstractAttribute &QueryingAA,
-                                     const Instruction *CtxI) {
+                                     const Instruction *CtxI,
+                                     bool Intraprocedural) {
   auto StripCB = [&](Value *V) { return getUnderlyingObject(V); };
   SmallPtrSet<Value *, 8> SeenObjects;
   auto VisitValueCB = [&SeenObjects](Value &Val, const Instruction *,
@@ -387,7 +423,7 @@ bool AA::getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr,
   };
   if (!genericValueTraversal<decltype(Objects)>(
           A, IRPosition::value(Ptr), QueryingAA, Objects, VisitValueCB, CtxI,
-          true, 32, StripCB))
+          true, 32, StripCB, Intraprocedural))
     return false;
   return true;
 }
@@ -620,7 +656,7 @@ struct AACallSiteReturnedFromReturned : public BaseType {
     if (!AssociatedFunction)
       return S.indicatePessimisticFixpoint();
 
-    CallBase &CBContext = static_cast<CallBase &>(this->getAnchorValue());
+    CallBase &CBContext = cast<CallBase>(this->getAnchorValue());
     if (IntroduceCallBaseContext)
       LLVM_DEBUG(dbgs() << "[Attributor] Introducing call base context:"
                         << CBContext << "\n");
@@ -1026,7 +1062,6 @@ private:
   BooleanState BS;
 };
 
-namespace {
 struct AAPointerInfoImpl
     : public StateWrapper<AA::PointerInfo::State, AAPointerInfo> {
   using BaseTy = StateWrapper<AA::PointerInfo::State, AAPointerInfo>;
@@ -1058,6 +1093,165 @@ struct AAPointerInfoImpl
       const override {
     return State::forallInterferingAccesses(SI, CB);
   }
+  bool forallInterferingWrites(
+      Attributor &A, const AbstractAttribute &QueryingAA, LoadInst &LI,
+      function_ref<bool(const Access &, bool)> UserCB) const override {
+    SmallPtrSet<const Access *, 8> DominatingWrites;
+    SmallVector<std::pair<const Access *, bool>, 8> InterferingWrites;
+
+    Function &Scope = *LI.getFunction();
+    const auto &NoSyncAA = A.getAAFor<AANoSync>(
+        QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL);
+    const auto *ExecDomainAA = A.lookupAAFor<AAExecutionDomain>(
+        IRPosition::function(Scope), &QueryingAA, DepClassTy::OPTIONAL);
+    const bool NoSync = NoSyncAA.isAssumedNoSync();
+
+    // Helper to determine if we need to consider threading, which we cannot
+    // right now. However, if the function is (assumed) nosync or the thread
+    // executing all instructions is the main thread only we can ignore
+    // threading.
+    auto CanIgnoreThreading = [&](const Instruction &I) -> bool {
+      if (NoSync)
+        return true;
+      if (ExecDomainAA && ExecDomainAA->isExecutedByInitialThreadOnly(I))
+        return true;
+      return false;
+    };
+
+    // Helper to determine if the access is executed by the same thread as the
+    // load, for now it is sufficient to avoid any potential threading effects
+    // as we cannot deal with them anyway.
+    auto IsSameThreadAsLoad = [&](const Access &Acc) -> bool {
+      return CanIgnoreThreading(*Acc.getLocalInst());
+    };
+
+    // TODO: Use inter-procedural reachability and dominance.
+    const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
+        QueryingAA, IRPosition::function(*LI.getFunction()),
+        DepClassTy::OPTIONAL);
+
+    const bool CanUseCFGResoning = CanIgnoreThreading(LI);
+    InformationCache &InfoCache = A.getInfoCache();
+    const DominatorTree *DT =
+        NoRecurseAA.isKnownNoRecurse()
+            ? InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(
+                  Scope)
+            : nullptr;
+
+    enum GPUAddressSpace : unsigned {
+      Generic = 0,
+      Global = 1,
+      Shared = 3,
+      Constant = 4,
+      Local = 5,
+    };
+
+    // Helper to check if a value has "kernel lifetime", that is it will not
+    // outlive a GPU kernel. This is true for shared, constant, and local
+    // globals on AMD and NVIDIA GPUs.
+    auto HasKernelLifetime = [&](Value *V, Module &M) {
+      Triple T(M.getTargetTriple());
+      if (!(T.isAMDGPU() || T.isNVPTX()))
+        return false;
+      switch (V->getType()->getPointerAddressSpace()) {
+      case GPUAddressSpace::Shared:
+      case GPUAddressSpace::Constant:
+      case GPUAddressSpace::Local:
+        return true;
+      default:
+        return false;
+      };
+    };
+
+    // The IsLiveInCalleeCB will be used by the AA::isPotentiallyReachable query
+    // to determine if we should look at reachability from the callee. For
+    // certain pointers we know the lifetime and we do not have to step into the
+    // callee to determine reachability as the pointer would be dead in the
+    // callee. See the conditional initialization below.
+    std::function<bool(const Function &)> IsLiveInCalleeCB;
+
+    if (auto *AI = dyn_cast<AllocaInst>(&getAssociatedValue())) {
+      // If the alloca containing function is not recursive the alloca
+      // must be dead in the callee.
+      const Function *AIFn = AI->getFunction();
+      const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
+          *this, IRPosition::function(*AIFn), DepClassTy::OPTIONAL);
+      if (NoRecurseAA.isAssumedNoRecurse()) {
+        IsLiveInCalleeCB = [AIFn](const Function &Fn) { return AIFn != &Fn; };
+      }
+    } else if (auto *GV = dyn_cast<GlobalValue>(&getAssociatedValue())) {
+      // If the global has kernel lifetime we can stop if we reach a kernel
+      // as it is "dead" in the (unknown) callees.
+      if (HasKernelLifetime(GV, *GV->getParent()))
+        IsLiveInCalleeCB = [](const Function &Fn) {
+          return !Fn.hasFnAttribute("kernel");
+        };
+    }
+
+    auto AccessCB = [&](const Access &Acc, bool Exact) {
+      if (!Acc.isWrite())
+        return true;
+
+      // For now we only filter accesses based on CFG reasoning which does not
+      // work yet if we have threading effects, or the access is complicated.
+      if (CanUseCFGResoning) {
+        if (!AA::isPotentiallyReachable(A, *Acc.getLocalInst(), LI, QueryingAA,
+                                        IsLiveInCalleeCB))
+          return true;
+        if (DT && Exact &&
+            (Acc.getLocalInst()->getFunction() == LI.getFunction()) &&
+            IsSameThreadAsLoad(Acc)) {
+          if (DT->dominates(Acc.getLocalInst(), &LI))
+            DominatingWrites.insert(&Acc);
+        }
+      }
+
+      InterferingWrites.push_back({&Acc, Exact});
+      return true;
+    };
+    if (!State::forallInterferingAccesses(LI, AccessCB))
+      return false;
+
+    // If we cannot use CFG reasoning we only filter the non-write accesses
+    // and are done here.
+    if (!CanUseCFGResoning) {
+      for (auto &It : InterferingWrites)
+        if (!UserCB(*It.first, It.second))
+          return false;
+      return true;
+    }
+
+    // Helper to determine if we can skip a specific write access. This is in
+    // the worst case quadratic as we are looking for another write that will
+    // hide the effect of this one.
+    auto CanSkipAccess = [&](const Access &Acc, bool Exact) {
+      if (!IsSameThreadAsLoad(Acc))
+        return false;
+      if (!DominatingWrites.count(&Acc))
+        return false;
+      for (const Access *DomAcc : DominatingWrites) {
+        assert(Acc.getLocalInst()->getFunction() ==
+                   DomAcc->getLocalInst()->getFunction() &&
+               "Expected dominating writes to be in the same function!");
+
+        if (DomAcc != &Acc &&
+            DT->dominates(Acc.getLocalInst(), DomAcc->getLocalInst())) {
+          return true;
+        }
+      }
+      return false;
+    };
+
+    // Run the user callback on all writes we cannot skip and return if that
+    // succeeded for all or not.
+    unsigned NumInterferingWrites = InterferingWrites.size();
+    for (auto &It : InterferingWrites)
+      if (!DT || NumInterferingWrites > MaxInterferingWrites ||
+          !CanSkipAccess(*It.first, It.second))
+        if (!UserCB(*It.first, It.second))
+          return false;
+    return true;
+  }
 
   ChangeStatus translateAndAddCalleeState(Attributor &A,
                                           const AAPointerInfo &CalleeAA,
@@ -1200,9 +1394,8 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
                             << " : " << *Idx << "\n");
           return false;
         }
-        UsrOI.Offset = PtrOI.Offset +
-                       DL.getIndexedOffsetInType(
-                           GEP->getSourceElementType(), Indices);
+        UsrOI.Offset = PtrOI.Offset + DL.getIndexedOffsetInType(
+                                          GEP->getSourceElementType(), Indices);
         Follow = true;
         return true;
       }
@@ -1693,17 +1886,9 @@ ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) {
 
   auto ReturnValueCB = [&](Value &V, const Instruction *CtxI, ReturnInst &Ret,
                            bool) -> bool {
-    bool UsedAssumedInformation = false;
-    Optional<Value *> SimpleRetVal =
-        A.getAssumedSimplified(V, *this, UsedAssumedInformation);
-    if (!SimpleRetVal.hasValue())
-      return true;
-    if (!SimpleRetVal.getValue())
-      return false;
-    Value *RetVal = *SimpleRetVal;
-    assert(AA::isValidInScope(*RetVal, Ret.getFunction()) &&
+    assert(AA::isValidInScope(V, Ret.getFunction()) &&
            "Assumed returned value should be valid in function scope!");
-    if (ReturnedValues[RetVal].insert(&Ret))
+    if (ReturnedValues[&V].insert(&Ret))
       Changed = ChangeStatus::CHANGED;
     return true;
   };
@@ -1712,7 +1897,8 @@ ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) {
     ReturnInst &Ret = cast<ReturnInst>(I);
     return genericValueTraversal<ReturnInst>(
         A, IRPosition::value(*Ret.getReturnValue()), *this, Ret, ReturnValueCB,
-        &I);
+        &I, /* UseValueSimplify */ true, /* MaxValues */ 16,
+        /* StripCB */ nullptr, /* Intraprocedural */ true);
   };
 
   // Discover returned values from all live returned instructions in the
@@ -1767,24 +1953,16 @@ struct AANoSyncImpl : AANoSync {
 
   /// See AbstractAttribute::updateImpl(...).
   ChangeStatus updateImpl(Attributor &A) override;
-
-  /// Helper function used to determine whether an instruction is non-relaxed
-  /// atomic. In other words, if an atomic instruction does not have unordered
-  /// or monotonic ordering
-  static bool isNonRelaxedAtomic(Instruction *I);
-
-  /// Helper function specific for intrinsics which are potentially volatile
-  static bool isNoSyncIntrinsic(Instruction *I);
 };
 
-bool AANoSyncImpl::isNonRelaxedAtomic(Instruction *I) {
+bool AANoSync::isNonRelaxedAtomic(const Instruction *I) {
   if (!I->isAtomic())
     return false;
 
   if (auto *FI = dyn_cast<FenceInst>(I))
     // All legal orderings for fence are stronger than monotonic.
     return FI->getSyncScopeID() != SyncScope::SingleThread;
-  else if (auto *AI = dyn_cast<AtomicCmpXchgInst>(I)) {
+  if (auto *AI = dyn_cast<AtomicCmpXchgInst>(I)) {
     // Unordered is not a legal ordering for cmpxchg.
     return (AI->getSuccessOrdering() != AtomicOrdering::Monotonic ||
             AI->getFailureOrdering() != AtomicOrdering::Monotonic);
@@ -1813,7 +1991,7 @@ bool AANoSyncImpl::isNonRelaxedAtomic(Instruction *I) {
 /// Return true if this intrinsic is nosync.  This is only used for intrinsics
 /// which would be nosync except that they have a volatile flag.  All other
 /// intrinsics are simply annotated with the nosync attribute in Intrinsics.td.
-bool AANoSyncImpl::isNoSyncIntrinsic(Instruction *I) {
+bool AANoSync::isNoSyncIntrinsic(const Instruction *I) {
   if (auto *MI = dyn_cast<MemIntrinsic>(I))
     return !MI->isVolatile();
   return false;
@@ -1822,24 +2000,7 @@ bool AANoSyncImpl::isNoSyncIntrinsic(Instruction *I) {
 ChangeStatus AANoSyncImpl::updateImpl(Attributor &A) {
 
   auto CheckRWInstForNoSync = [&](Instruction &I) {
-    /// We are looking for volatile instructions or Non-Relaxed atomics.
-
-    if (const auto *CB = dyn_cast<CallBase>(&I)) {
-      if (CB->hasFnAttr(Attribute::NoSync))
-        return true;
-
-      if (isNoSyncIntrinsic(&I))
-        return true;
-
-      const auto &NoSyncAA = A.getAAFor<AANoSync>(
-          *this, IRPosition::callsite_function(*CB), DepClassTy::REQUIRED);
-      return NoSyncAA.isAssumedNoSync();
-    }
-
-    if (!I.isVolatile() && !isNonRelaxedAtomic(&I))
-      return true;
-
-    return false;
+    return AA::isNoSyncInst(A, I, *this);
   };
 
   auto CheckForNoSync = [&](Instruction &I) {
@@ -2327,16 +2488,6 @@ struct AANoRecurseFunction final : AANoRecurseImpl {
   AANoRecurseFunction(const IRPosition &IRP, Attributor &A)
       : AANoRecurseImpl(IRP, A) {}
 
-  /// See AbstractAttribute::initialize(...).
-  void initialize(Attributor &A) override {
-    AANoRecurseImpl::initialize(A);
-    // TODO: We should build a call graph ourselves to enable this in the module
-    // pass as well.
-    if (const Function *F = getAnchorScope())
-      if (A.getInfoCache().getSccSize(*F) != 1)
-        indicatePessimisticFixpoint();
-  }
-
   /// See AbstractAttribute::updateImpl(...).
   ChangeStatus updateImpl(Attributor &A) override {
 
@@ -2359,27 +2510,10 @@ struct AANoRecurseFunction final : AANoRecurseImpl {
       return ChangeStatus::UNCHANGED;
     }
 
-    // If the above check does not hold anymore we look at the calls.
-    auto CheckForNoRecurse = [&](Instruction &I) {
-      const auto &CB = cast<CallBase>(I);
-      if (CB.hasFnAttr(Attribute::NoRecurse))
-        return true;
-
-      const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
-          *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
-      if (!NoRecurseAA.isAssumedNoRecurse())
-        return false;
-
-      // Recursion to the same function
-      if (CB.getCalledFunction() == getAnchorScope())
-        return false;
-
-      return true;
-    };
-
-    bool UsedAssumedInformation = false;
-    if (!A.checkForAllCallLikeInstructions(CheckForNoRecurse, *this,
-                                           UsedAssumedInformation))
+    const AAFunctionReachability &EdgeReachability =
+        A.getAAFor<AAFunctionReachability>(*this, getIRPosition(),
+                                           DepClassTy::REQUIRED);
+    if (EdgeReachability.canReach(A, *getAnchorScope()))
       return indicatePessimisticFixpoint();
     return ChangeStatus::UNCHANGED;
   }
@@ -2798,16 +2932,10 @@ struct AAWillReturnImpl : public AAWillReturn {
         (!getAssociatedFunction() || !getAssociatedFunction()->mustProgress()))
       return false;
 
-    const auto &MemAA =
-        A.getAAFor<AAMemoryBehavior>(*this, getIRPosition(), DepClassTy::NONE);
-    if (!MemAA.isAssumedReadOnly())
-      return false;
-    if (KnownOnly && !MemAA.isKnownReadOnly())
-      return false;
-    if (!MemAA.isKnownReadOnly())
-      A.recordDependence(MemAA, *this, DepClassTy::OPTIONAL);
-
-    return true;
+    bool IsKnown;
+    if (AA::isAssumedReadOnly(A, getIRPosition(), *this, IsKnown))
+      return IsKnown || !KnownOnly;
+    return false;
   }
 
   /// See AbstractAttribute::updateImpl(...).
@@ -2904,6 +3032,10 @@ struct AAReachabilityImpl : AAReachability {
 
   /// See AbstractAttribute::updateImpl(...).
   ChangeStatus updateImpl(Attributor &A) override {
+    const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
+        *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
+    if (!NoRecurseAA.isAssumedNoRecurse())
+      return indicatePessimisticFixpoint();
     return ChangeStatus::UNCHANGED;
   }
 };
@@ -3008,9 +3140,8 @@ struct AANoAliasArgument final
       return Base::updateImpl(A);
 
     // If the argument is read-only, no-alias cannot break synchronization.
-    const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>(
-        *this, getIRPosition(), DepClassTy::OPTIONAL);
-    if (MemBehaviorAA.isAssumedReadOnly())
+    bool IsKnown;
+    if (AA::isAssumedReadOnly(A, getIRPosition(), *this, IsKnown))
       return Base::updateImpl(A);
 
     // If the argument is never passed through callbacks, no-alias cannot break
@@ -3366,14 +3497,8 @@ struct AAIsDeadValueImpl : public AAIsDead {
     if (!NoUnwindAA.isKnownNoUnwind())
       A.recordDependence(NoUnwindAA, *this, DepClassTy::OPTIONAL);
 
-    const auto &MemBehaviorAA =
-        A.getAndUpdateAAFor<AAMemoryBehavior>(*this, CallIRP, DepClassTy::NONE);
-    if (MemBehaviorAA.isAssumedReadOnly()) {
-      if (!MemBehaviorAA.isKnownReadOnly())
-        A.recordDependence(MemBehaviorAA, *this, DepClassTy::OPTIONAL);
-      return true;
-    }
-    return false;
+    bool IsKnown;
+    return AA::isAssumedReadOnly(A, CallIRP, *this, IsKnown);
   }
 };
 
@@ -3699,6 +3824,7 @@ struct AAIsDeadFunction : public AAIsDead {
       if (!AssumedLiveBlocks.count(&BB)) {
         A.deleteAfterManifest(BB);
         ++BUILD_STAT_NAME(AAIsDead, BasicBlock);
+        HasChanged = ChangeStatus::CHANGED;
       }
 
     return HasChanged;
@@ -3708,7 +3834,7 @@ struct AAIsDeadFunction : public AAIsDead {
   ChangeStatus updateImpl(Attributor &A) override;
 
   bool isEdgeDead(const BasicBlock *From, const BasicBlock *To) const override {
-    return !AssumedLiveEdges.count(std::make_pair(From, To));
+    return isValidState() && !AssumedLiveEdges.count(std::make_pair(From, To));
   }
 
   /// See AbstractAttribute::trackStatistics()
@@ -4921,14 +5047,11 @@ ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) {
   AANoCapture::StateType T;
 
   // Readonly means we cannot capture through memory.
-  const auto &FnMemAA =
-      A.getAAFor<AAMemoryBehavior>(*this, FnPos, DepClassTy::NONE);
-  if (FnMemAA.isAssumedReadOnly()) {
+  bool IsKnown;
+  if (AA::isAssumedReadOnly(A, FnPos, *this, IsKnown)) {
     T.addKnownBits(NOT_CAPTURED_IN_MEM);
-    if (FnMemAA.isKnownReadOnly())
+    if (IsKnown)
       addKnownBits(NOT_CAPTURED_IN_MEM);
-    else
-      A.recordDependence(FnMemAA, *this, DepClassTy::OPTIONAL);
   }
 
   // Make sure all returned values are different than the underlying value.
@@ -5085,7 +5208,6 @@ struct AANoCaptureCallSiteReturned final : AANoCaptureImpl {
     STATS_DECLTRACK_CSRET_ATTR(nocapture)
   }
 };
-} // namespace
 
 /// ------------------ Value Simplify Attribute ----------------------------
 
@@ -5106,7 +5228,6 @@ bool ValueSimplifyStateType::unionAssumed(Optional<Value *> Other) {
   return true;
 }
 
-namespace {
 struct AAValueSimplifyImpl : AAValueSimplify {
   AAValueSimplifyImpl(const IRPosition &IRP, Attributor &A)
       : AAValueSimplify(IRP, A) {}
@@ -5266,8 +5387,6 @@ struct AAValueSimplifyImpl : AAValueSimplify {
 
       auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) {
         LLVM_DEBUG(dbgs() << " - visit access " << Acc << "\n");
-        if (!Acc.isWrite())
-          return true;
         if (Acc.isWrittenValueYetUndetermined())
           return true;
         Value *Content = Acc.getWrittenValue();
@@ -5287,7 +5406,7 @@ struct AAValueSimplifyImpl : AAValueSimplify {
 
       auto &PI = A.getAAFor<AAPointerInfo>(AA, IRPosition::value(*Obj),
                                            DepClassTy::REQUIRED);
-      if (!PI.forallInterferingAccesses(L, CheckAccess))
+      if (!PI.forallInterferingWrites(A, AA, L, CheckAccess))
         return false;
     }
     return true;
@@ -5325,9 +5444,8 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
     if (Arg->hasByValAttr()) {
       // TODO: We probably need to verify synchronization is not an issue, e.g.,
       //       there is no race by not copying a constant byval.
-      const auto &MemAA = A.getAAFor<AAMemoryBehavior>(*this, getIRPosition(),
-                                                       DepClassTy::REQUIRED);
-      if (!MemAA.isAssumedReadOnly())
+      bool IsKnown;
+      if (!AA::isAssumedReadOnly(A, getIRPosition(), *this, IsKnown))
         return indicatePessimisticFixpoint();
     }
 
@@ -6827,9 +6945,8 @@ struct AAPrivatizablePtrCallSiteArgument final
       return indicatePessimisticFixpoint();
     }
 
-    const auto &MemBehaviorAA =
-        A.getAAFor<AAMemoryBehavior>(*this, IRP, DepClassTy::REQUIRED);
-    if (!MemBehaviorAA.isAssumedReadOnly()) {
+    bool IsKnown;
+    if (!AA::isAssumedReadOnly(A, IRP, *this, IsKnown)) {
       LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer is written!\n");
       return indicatePessimisticFixpoint();
     }
@@ -7378,7 +7495,6 @@ void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use &U,
   if (UserI->mayWriteToMemory())
     removeAssumedBits(NO_WRITES);
 }
-} // namespace
 
 /// -------------------- Memory Locations Attributes ---------------------------
 /// Includes read-none, argmemonly, inaccessiblememonly,
@@ -7412,7 +7528,6 @@ std::string AAMemoryLocation::getMemoryLocationsAsStr(
   return S;
 }
 
-namespace {
 struct AAMemoryLocationImpl : public AAMemoryLocation {
 
   AAMemoryLocationImpl(const IRPosition &IRP, Attributor &A)
@@ -7657,7 +7772,8 @@ void AAMemoryLocationImpl::categorizePtrValue(
                     << getMemoryLocationsAsStr(State.getAssumed()) << "]\n");
 
   SmallVector<Value *, 8> Objects;
-  if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, *this, &I)) {
+  if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, *this, &I,
+                                       /* Intraprocedural */ true)) {
     LLVM_DEBUG(
         dbgs() << "[AAMemoryLocation] Pointer locations not categorized\n");
     updateStateAndAccessesMap(State, NO_UNKOWN_MEM, &I, nullptr, Changed,
@@ -9411,7 +9527,7 @@ struct AACallEdgesCallSite : public AACallEdgesImpl {
       }
     };
 
-    CallBase *CB = static_cast<CallBase *>(getCtxI());
+    CallBase *CB = cast<CallBase>(getCtxI());
 
     if (CB->isInlineAsm()) {
       setHasUnknownCallee(false, Change);
@@ -9450,7 +9566,7 @@ struct AACallEdgesFunction : public AACallEdgesImpl {
     ChangeStatus Change = ChangeStatus::UNCHANGED;
 
     auto ProcessCallInst = [&](Instruction &Inst) {
-      CallBase &CB = static_cast<CallBase &>(Inst);
+      CallBase &CB = cast<CallBase>(Inst);
 
       auto &CBEdges = A.getAAFor<AACallEdges>(
           *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
@@ -9481,11 +9597,39 @@ struct AACallEdgesFunction : public AACallEdgesImpl {
 struct AAFunctionReachabilityFunction : public AAFunctionReachability {
 private:
   struct QuerySet {
-    void markReachable(Function *Fn) {
-      Reachable.insert(Fn);
-      Unreachable.erase(Fn);
+    void markReachable(const Function &Fn) {
+      Reachable.insert(&Fn);
+      Unreachable.erase(&Fn);
     }
 
+    /// If there is no information about the function None is returned.
+    Optional<bool> isCachedReachable(const Function &Fn) {
+      // Assume that we can reach the function.
+      // TODO: Be more specific with the unknown callee.
+      if (CanReachUnknownCallee)
+        return true;
+
+      if (Reachable.count(&Fn))
+        return true;
+
+      if (Unreachable.count(&Fn))
+        return false;
+
+      return llvm::None;
+    }
+
+    /// Set of functions that we know for sure is reachable.
+    DenseSet<const Function *> Reachable;
+
+    /// Set of functions that are unreachable, but might become reachable.
+    DenseSet<const Function *> Unreachable;
+
+    /// If we can reach a function with a call to a unknown function we assume
+    /// that we can reach any function.
+    bool CanReachUnknownCallee = false;
+  };
+
+  struct QueryResolver : public QuerySet {
     ChangeStatus update(Attributor &A, const AAFunctionReachability &AA,
                         ArrayRef<const AACallEdges *> AAEdgesList) {
       ChangeStatus Change = ChangeStatus::UNCHANGED;
@@ -9499,31 +9643,30 @@ private:
         }
       }
 
-      for (Function *Fn : make_early_inc_range(Unreachable)) {
-        if (checkIfReachable(A, AA, AAEdgesList, Fn)) {
+      for (const Function *Fn : make_early_inc_range(Unreachable)) {
+        if (checkIfReachable(A, AA, AAEdgesList, *Fn)) {
           Change = ChangeStatus::CHANGED;
-          markReachable(Fn);
+          markReachable(*Fn);
         }
       }
       return Change;
     }
 
-    bool isReachable(Attributor &A, const AAFunctionReachability &AA,
-                     ArrayRef<const AACallEdges *> AAEdgesList, Function *Fn) {
-      // Assume that we can reach the function.
-      // TODO: Be more specific with the unknown callee.
-      if (CanReachUnknownCallee)
-        return true;
-
-      if (Reachable.count(Fn))
-        return true;
+    bool isReachable(Attributor &A, AAFunctionReachability &AA,
+                     ArrayRef<const AACallEdges *> AAEdgesList,
+                     const Function &Fn) {
+      Optional<bool> Cached = isCachedReachable(Fn);
+      if (Cached.hasValue())
+        return Cached.getValue();
 
-      if (Unreachable.count(Fn))
-        return false;
+      // The query was not cached, thus it is new. We need to request an update
+      // explicitly to make sure this the information is properly run to a
+      // fixpoint.
+      A.registerForUpdate(AA);
 
       // We need to assume that this function can't reach Fn to prevent
       // an infinite loop if this function is recursive.
-      Unreachable.insert(Fn);
+      Unreachable.insert(&Fn);
 
       bool Result = checkIfReachable(A, AA, AAEdgesList, Fn);
       if (Result)
@@ -9533,13 +9676,13 @@ private:
 
     bool checkIfReachable(Attributor &A, const AAFunctionReachability &AA,
                           ArrayRef<const AACallEdges *> AAEdgesList,
-                          Function *Fn) const {
+                          const Function &Fn) const {
 
       // Handle the most trivial case first.
       for (auto *AAEdges : AAEdgesList) {
         const SetVector<Function *> &Edges = AAEdges->getOptimisticEdges();
 
-        if (Edges.count(Fn))
+        if (Edges.count(const_cast<Function *>(&Fn)))
           return true;
       }
 
@@ -9560,28 +9703,44 @@ private:
       }
 
       // The result is false for now, set dependencies and leave.
-      for (auto Dep : Deps)
-        A.recordDependence(AA, *Dep, DepClassTy::REQUIRED);
+      for (auto *Dep : Deps)
+        A.recordDependence(*Dep, AA, DepClassTy::REQUIRED);
 
       return false;
     }
+  };
 
-    /// Set of functions that we know for sure is reachable.
-    DenseSet<Function *> Reachable;
+  /// Get call edges that can be reached by this instruction.
+  bool getReachableCallEdges(Attributor &A, const AAReachability &Reachability,
+                             const Instruction &Inst,
+                             SmallVector<const AACallEdges *> &Result) const {
+    // Determine call like instructions that we can reach from the inst.
+    auto CheckCallBase = [&](Instruction &CBInst) {
+      if (!Reachability.isAssumedReachable(A, Inst, CBInst))
+        return true;
 
-    /// Set of functions that are unreachable, but might become reachable.
-    DenseSet<Function *> Unreachable;
+      auto &CB = cast<CallBase>(CBInst);
+      const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
+          *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
 
-    /// If we can reach a function with a call to a unknown function we assume
-    /// that we can reach any function.
-    bool CanReachUnknownCallee = false;
-  };
+      Result.push_back(&AAEdges);
+      return true;
+    };
+
+    bool UsedAssumedInformation = false;
+    return A.checkForAllCallLikeInstructions(CheckCallBase, *this,
+                                             UsedAssumedInformation,
+                                             /* CheckBBLivenessOnly */ true);
+  }
 
 public:
   AAFunctionReachabilityFunction(const IRPosition &IRP, Attributor &A)
       : AAFunctionReachability(IRP, A) {}
 
-  bool canReach(Attributor &A, Function *Fn) const override {
+  bool canReach(Attributor &A, const Function &Fn) const override {
+    if (!isValidState())
+      return true;
+
     const AACallEdges &AAEdges =
         A.getAAFor<AACallEdges>(*this, getIRPosition(), DepClassTy::REQUIRED);
 
@@ -9590,14 +9749,18 @@ public:
     // a const_cast.
     // This is a hack for us to be able to cache queries.
     auto *NonConstThis = const_cast<AAFunctionReachabilityFunction *>(this);
-    bool Result =
-        NonConstThis->WholeFunction.isReachable(A, *this, {&AAEdges}, Fn);
+    bool Result = NonConstThis->WholeFunction.isReachable(A, *NonConstThis,
+                                                          {&AAEdges}, Fn);
 
     return Result;
   }
 
   /// Can \p CB reach \p Fn
-  bool canReach(Attributor &A, CallBase &CB, Function *Fn) const override {
+  bool canReach(Attributor &A, CallBase &CB,
+                const Function &Fn) const override {
+    if (!isValidState())
+      return true;
+
     const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
         *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
 
@@ -9606,13 +9769,40 @@ public:
     // a const_cast.
     // This is a hack for us to be able to cache queries.
     auto *NonConstThis = const_cast<AAFunctionReachabilityFunction *>(this);
-    QuerySet &CBQuery = NonConstThis->CBQueries[&CB];
+    QueryResolver &CBQuery = NonConstThis->CBQueries[&CB];
 
-    bool Result = CBQuery.isReachable(A, *this, {&AAEdges}, Fn);
+    bool Result = CBQuery.isReachable(A, *NonConstThis, {&AAEdges}, Fn);
 
     return Result;
   }
 
+  bool instructionCanReach(Attributor &A, const Instruction &Inst,
+                           const Function &Fn,
+                           bool UseBackwards) const override {
+    if (!isValidState())
+      return true;
+
+    if (UseBackwards)
+      return AA::isPotentiallyReachable(A, Inst, Fn, *this, nullptr);
+
+    const auto &Reachability = A.getAAFor<AAReachability>(
+        *this, IRPosition::function(*getAssociatedFunction()),
+        DepClassTy::REQUIRED);
+
+    SmallVector<const AACallEdges *> CallEdges;
+    bool AllKnown = getReachableCallEdges(A, Reachability, Inst, CallEdges);
+    // Attributor returns attributes as const, so this function has to be
+    // const for users of this attribute to use it without having to do
+    // a const_cast.
+    // This is a hack for us to be able to cache queries.
+    auto *NonConstThis = const_cast<AAFunctionReachabilityFunction *>(this);
+    QueryResolver &InstQSet = NonConstThis->InstQueries[&Inst];
+    if (!AllKnown)
+      InstQSet.CanReachUnknownCallee = true;
+
+    return InstQSet.isReachable(A, *NonConstThis, CallEdges, Fn);
+  }
+
   /// See AbstractAttribute::updateImpl(...).
   ChangeStatus updateImpl(Attributor &A) override {
     const AACallEdges &AAEdges =
@@ -9621,7 +9811,7 @@ public:
 
     Change |= WholeFunction.update(A, *this, {&AAEdges});
 
-    for (auto CBPair : CBQueries) {
+    for (auto &CBPair : CBQueries) {
       const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
           *this, IRPosition::callsite_function(*CBPair.first),
           DepClassTy::REQUIRED);
@@ -9629,6 +9819,25 @@ public:
       Change |= CBPair.second.update(A, *this, {&AAEdges});
     }
 
+    // Update the Instruction queries.
+    const AAReachability *Reachability;
+    if (!InstQueries.empty()) {
+      Reachability = &A.getAAFor<AAReachability>(
+          *this, IRPosition::function(*getAssociatedFunction()),
+          DepClassTy::REQUIRED);
+    }
+
+    // Check for local callbases first.
+    for (auto &InstPair : InstQueries) {
+      SmallVector<const AACallEdges *> CallEdges;
+      bool AllKnown =
+          getReachableCallEdges(A, *Reachability, *InstPair.first, CallEdges);
+      // Update will return change if we this effects any queries.
+      if (!AllKnown)
+        InstPair.second.CanReachUnknownCallee = true;
+      Change |= InstPair.second.update(A, *this, CallEdges);
+    }
+
     return Change;
   }
 
@@ -9649,11 +9858,14 @@ private:
   }
 
   /// Used to answer if a the whole function can reacha a specific function.
-  QuerySet WholeFunction;
+  QueryResolver WholeFunction;
 
   /// Used to answer if a call base inside this function can reach a specific
   /// function.
-  DenseMap<CallBase *, QuerySet> CBQueries;
+  DenseMap<const CallBase *, QueryResolver> CBQueries;
+
+  /// This is for instruction queries than scan "forward".
+  DenseMap<const Instruction *, QueryResolver> InstQueries;
 };
 
 /// ---------------------- Assumption Propagation ------------------------------
@@ -9790,8 +10002,6 @@ private:
   }
 };
 
-} // namespace
-
 AACallGraphNode *AACallEdgeIterator::operator*() const {
   return static_cast<AACallGraphNode *>(const_cast<AACallEdges *>(
       &A.getOrCreateAAFor<AACallEdges>(IRPosition::function(**I))));
diff --git a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp
index 74f11fa30959..927dceec8865 100644
--- a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp
+++ b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Analysis/ValueLatticeUtils.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Transforms/IPO.h"
 using namespace llvm;
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index d3cac3efce86..1cb32e32c895 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -352,14 +352,10 @@ static bool collectSRATypes(DenseMap<uint64_t, Type *> &Types, GlobalValue *GV,
   while (!Worklist.empty()) {
     Use *U = Worklist.pop_back_val();
     User *V = U->getUser();
-    if (isa<BitCastOperator>(V) || isa<AddrSpaceCastOperator>(V)) {
-      AppendUses(V);
-      continue;
-    }
 
-    if (auto *GEP = dyn_cast<GEPOperator>(V)) {
-      if (!GEP->hasAllConstantIndices())
-        return false;
+    auto *GEP = dyn_cast<GEPOperator>(V);
+    if (isa<BitCastOperator>(V) || isa<AddrSpaceCastOperator>(V) ||
+        (GEP && GEP->hasAllConstantIndices())) {
       AppendUses(V);
       continue;
     }
@@ -2229,6 +2225,13 @@ OptimizeGlobalAliases(Module &M,
   for (GlobalValue *GV : Used.used())
     Used.compilerUsedErase(GV);
 
+  // Return whether GV is explicitly or implicitly dso_local and not replaceable
+  // by another definition in the current linkage unit.
+  auto IsModuleLocal = [](GlobalValue &GV) {
+    return !GlobalValue::isInterposableLinkage(GV.getLinkage()) &&
+           (GV.isDSOLocal() || GV.isImplicitDSOLocal());
+  };
+
   for (GlobalAlias &J : llvm::make_early_inc_range(M.aliases())) {
     // Aliases without names cannot be referenced outside this module.
     if (!J.hasName() && !J.isDeclaration() && !J.hasLocalLinkage())
@@ -2240,18 +2243,20 @@ OptimizeGlobalAliases(Module &M,
     }
 
     // If the alias can change at link time, nothing can be done - bail out.
-    if (J.isInterposable())
+    if (!IsModuleLocal(J))
       continue;
 
     Constant *Aliasee = J.getAliasee();
     GlobalValue *Target = dyn_cast<GlobalValue>(Aliasee->stripPointerCasts());
     // We can't trivially replace the alias with the aliasee if the aliasee is
     // non-trivial in some way. We also can't replace the alias with the aliasee
-    // if the aliasee is interposable because aliases point to the local
-    // definition.
+    // if the aliasee may be preemptible at runtime. On ELF, a non-preemptible
+    // alias can be used to access the definition as if preemption did not
+    // happen.
     // TODO: Try to handle non-zero GEPs of local aliasees.
-    if (!Target || Target->isInterposable())
+    if (!Target || !IsModuleLocal(*Target))
       continue;
+
     Target->removeDeadConstantUsers();
 
     // Make all users of the alias use the aliasee instead.
diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp
index e064fbbef595..faf7cb7d566a 100644
--- a/llvm/lib/Transforms/IPO/IROutliner.cpp
+++ b/llvm/lib/Transforms/IPO/IROutliner.cpp
@@ -42,6 +42,11 @@ extern cl::opt<bool> DisableBranches;
 // A command flag to be used for debugging to indirect calls from similarity
 // matching and outlining.
 extern cl::opt<bool> DisableIndirectCalls;
+
+// A command flag to be used for debugging to exclude intrinsics from similarity
+// matching and outlining.
+extern cl::opt<bool> DisableIntrinsics;
+
 } // namespace llvm
 
 // Set to true if the user wants the ir outliner to run on linkonceodr linkage
@@ -2610,6 +2615,8 @@ unsigned IROutliner::doOutline(Module &M) {
   // Find the possible similarity sections.
   InstructionClassifier.EnableBranches = !DisableBranches;
   InstructionClassifier.EnableIndirectCalls = !DisableIndirectCalls;
+  InstructionClassifier.EnableIntrinsics = !DisableIntrinsics;
+
   IRSimilarityIdentifier &Identifier = getIRSI(M);
   SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
 
diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index c0bb19e184d6..8e83d7bcb6c2 100644
--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -42,6 +42,7 @@
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 68f33410c602..2d765fb6ce6d 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -26,19 +26,25 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
 #include "llvm/IR/Assumptions.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/IntrinsicsAMDGPU.h"
 #include "llvm/IR/IntrinsicsNVPTX.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/Attributor.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -98,6 +104,11 @@ static cl::opt<bool> DisableOpenMPOptStateMachineRewrite(
     cl::desc("Disable OpenMP optimizations that replace the state machine."),
     cl::Hidden, cl::init(false));
 
+static cl::opt<bool> DisableOpenMPOptBarrierElimination(
+    "openmp-opt-disable-barrier-elimination", cl::ZeroOrMore,
+    cl::desc("Disable OpenMP optimizations that eliminate barriers."),
+    cl::Hidden, cl::init(false));
+
 static cl::opt<bool> PrintModuleAfterOptimizations(
     "openmp-opt-print-module", cl::ZeroOrMore,
     cl::desc("Print the current module after OpenMP optimizations."),
@@ -147,6 +158,7 @@ STATISTIC(NumOpenMPParallelRegionsMerged,
           "Number of OpenMP parallel regions merged");
 STATISTIC(NumBytesMovedToSharedMemory,
           "Amount of memory pushed to shared memory");
+STATISTIC(NumBarriersEliminated, "Number of redundant barriers eliminated");
 
 #if !defined(NDEBUG)
 static constexpr auto TAG = "[" DEBUG_TYPE "]";
@@ -458,7 +470,6 @@ struct OMPInformationCache : public InformationCache {
     RTLFunctions.insert(F);                                                    \
     if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) {           \
       RuntimeFunctionIDMap[F] = _Enum;                                         \
-      F->removeFnAttr(Attribute::NoInline);                                    \
       auto &RFI = RFIs[_Enum];                                                 \
       RFI.Kind = _Enum;                                                        \
       RFI.Name = _Name;                                                        \
@@ -480,6 +491,15 @@ struct OMPInformationCache : public InformationCache {
   }
 #include "llvm/Frontend/OpenMP/OMPKinds.def"
 
+    // Remove the `noinline` attribute from `__kmpc`, `_OMP::` and `omp_`
+    // functions, except if `optnone` is present.
+    for (Function &F : M) {
+      for (StringRef Prefix : {"__kmpc", "_ZN4_OMP", "omp_"})
+        if (F.getName().startswith(Prefix) &&
+            !F.hasFnAttribute(Attribute::OptimizeNone))
+          F.removeFnAttr(Attribute::NoInline);
+    }
+
     // TODO: We should attach the attributes defined in OMPKinds.def.
   }
 
@@ -787,6 +807,8 @@ struct OpenMPOpt {
 
       if (remarksEnabled())
         analysisGlobalization();
+
+      Changed |= eliminateBarriers();
     } else {
       if (PrintICVValues)
         printICVs();
@@ -809,6 +831,8 @@ struct OpenMPOpt {
           Changed = true;
         }
       }
+
+      Changed |= eliminateBarriers();
     }
 
     return Changed;
@@ -1378,6 +1402,213 @@ private:
     return Changed;
   }
 
+  /// Eliminates redundant, aligned barriers in OpenMP offloaded kernels.
+  /// TODO: Make this an AA and expand it to work across blocks and functions.
+  bool eliminateBarriers() {
+    bool Changed = false;
+
+    if (DisableOpenMPOptBarrierElimination)
+      return /*Changed=*/false;
+
+    if (OMPInfoCache.Kernels.empty())
+      return /*Changed=*/false;
+
+    enum ImplicitBarrierType { IBT_ENTRY, IBT_EXIT };
+
+    class BarrierInfo {
+      Instruction *I;
+      enum ImplicitBarrierType Type;
+
+    public:
+      BarrierInfo(enum ImplicitBarrierType Type) : I(nullptr), Type(Type) {}
+      BarrierInfo(Instruction &I) : I(&I) {}
+
+      bool isImplicit() { return !I; }
+
+      bool isImplicitEntry() { return isImplicit() && Type == IBT_ENTRY; }
+
+      bool isImplicitExit() { return isImplicit() && Type == IBT_EXIT; }
+
+      Instruction *getInstruction() { return I; }
+    };
+
+    for (Function *Kernel : OMPInfoCache.Kernels) {
+      for (BasicBlock &BB : *Kernel) {
+        SmallVector<BarrierInfo, 8> BarriersInBlock;
+        SmallPtrSet<Instruction *, 8> BarriersToBeDeleted;
+
+        // Add the kernel entry implicit barrier.
+        if (&Kernel->getEntryBlock() == &BB)
+          BarriersInBlock.push_back(IBT_ENTRY);
+
+        // Find implicit and explicit aligned barriers in the same basic block.
+        for (Instruction &I : BB) {
+          if (isa<ReturnInst>(I)) {
+            // Add the implicit barrier when exiting the kernel.
+            BarriersInBlock.push_back(IBT_EXIT);
+            continue;
+          }
+          CallBase *CB = dyn_cast<CallBase>(&I);
+          if (!CB)
+            continue;
+
+          auto IsAlignBarrierCB = [&](CallBase &CB) {
+            switch (CB.getIntrinsicID()) {
+            case Intrinsic::nvvm_barrier0:
+            case Intrinsic::nvvm_barrier0_and:
+            case Intrinsic::nvvm_barrier0_or:
+            case Intrinsic::nvvm_barrier0_popc:
+            case Intrinsic::amdgcn_s_barrier:
+              return true;
+            default:
+              break;
+            }
+            return hasAssumption(CB,
+                                 KnownAssumptionString("ompx_aligned_barrier"));
+          };
+
+          if (IsAlignBarrierCB(*CB)) {
+            // Add an explicit aligned barrier.
+            BarriersInBlock.push_back(I);
+          }
+        }
+
+        if (BarriersInBlock.size() <= 1)
+          continue;
+
+        // A barrier in a barrier pair is removeable if all instructions
+        // between the barriers in the pair are side-effect free modulo the
+        // barrier operation.
+        auto IsBarrierRemoveable = [&Kernel](BarrierInfo *StartBI,
+                                             BarrierInfo *EndBI) {
+          assert(
+              !StartBI->isImplicitExit() &&
+              "Expected start barrier to be other than a kernel exit barrier");
+          assert(
+              !EndBI->isImplicitEntry() &&
+              "Expected end barrier to be other than a kernel entry barrier");
+          // If StarBI instructions is null then this the implicit
+          // kernel entry barrier, so iterate from the first instruction in the
+          // entry block.
+          Instruction *I = (StartBI->isImplicitEntry())
+                               ? &Kernel->getEntryBlock().front()
+                               : StartBI->getInstruction()->getNextNode();
+          assert(I && "Expected non-null start instruction");
+          Instruction *E = (EndBI->isImplicitExit())
+                               ? I->getParent()->getTerminator()
+                               : EndBI->getInstruction();
+          assert(E && "Expected non-null end instruction");
+
+          for (; I != E; I = I->getNextNode()) {
+            if (!I->mayHaveSideEffects() && !I->mayReadFromMemory())
+              continue;
+
+            auto IsPotentiallyAffectedByBarrier =
+                [](Optional<MemoryLocation> Loc) {
+                  const Value *Obj = (Loc && Loc->Ptr)
+                                         ? getUnderlyingObject(Loc->Ptr)
+                                         : nullptr;
+                  if (!Obj) {
+                    LLVM_DEBUG(
+                        dbgs()
+                        << "Access to unknown location requires barriers\n");
+                    return true;
+                  }
+                  if (isa<UndefValue>(Obj))
+                    return false;
+                  if (isa<AllocaInst>(Obj))
+                    return false;
+                  if (auto *GV = dyn_cast<GlobalVariable>(Obj)) {
+                    if (GV->isConstant())
+                      return false;
+                    if (GV->isThreadLocal())
+                      return false;
+                    if (GV->getAddressSpace() == (int)AddressSpace::Local)
+                      return false;
+                    if (GV->getAddressSpace() == (int)AddressSpace::Constant)
+                      return false;
+                  }
+                  LLVM_DEBUG(dbgs() << "Access to '" << *Obj
+                                    << "' requires barriers\n");
+                  return true;
+                };
+
+            if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
+              Optional<MemoryLocation> Loc = MemoryLocation::getForDest(MI);
+              if (IsPotentiallyAffectedByBarrier(Loc))
+                return false;
+              if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
+                Optional<MemoryLocation> Loc =
+                    MemoryLocation::getForSource(MTI);
+                if (IsPotentiallyAffectedByBarrier(Loc))
+                  return false;
+              }
+              continue;
+            }
+
+            if (auto *LI = dyn_cast<LoadInst>(I))
+              if (LI->hasMetadata(LLVMContext::MD_invariant_load))
+                continue;
+
+            Optional<MemoryLocation> Loc = MemoryLocation::getOrNone(I);
+            if (IsPotentiallyAffectedByBarrier(Loc))
+              return false;
+          }
+
+          return true;
+        };
+
+        // Iterate barrier pairs and remove an explicit barrier if analysis
+        // deems it removeable.
+        for (auto *It = BarriersInBlock.begin(),
+                  *End = BarriersInBlock.end() - 1;
+             It != End; ++It) {
+
+          BarrierInfo *StartBI = It;
+          BarrierInfo *EndBI = (It + 1);
+
+          // Cannot remove when both are implicit barriers, continue.
+          if (StartBI->isImplicit() && EndBI->isImplicit())
+            continue;
+
+          if (!IsBarrierRemoveable(StartBI, EndBI))
+            continue;
+
+          assert(!(StartBI->isImplicit() && EndBI->isImplicit()) &&
+                 "Expected at least one explicit barrier to remove.");
+
+          // Remove an explicit barrier, check first, then second.
+          if (!StartBI->isImplicit()) {
+            LLVM_DEBUG(dbgs() << "Remove start barrier "
+                              << *StartBI->getInstruction() << "\n");
+            BarriersToBeDeleted.insert(StartBI->getInstruction());
+          } else {
+            LLVM_DEBUG(dbgs() << "Remove end barrier "
+                              << *EndBI->getInstruction() << "\n");
+            BarriersToBeDeleted.insert(EndBI->getInstruction());
+          }
+        }
+
+        if (BarriersToBeDeleted.empty())
+          continue;
+
+        Changed = true;
+        for (Instruction *I : BarriersToBeDeleted) {
+          ++NumBarriersEliminated;
+          auto Remark = [&](OptimizationRemark OR) {
+            return OR << "Redundant barrier eliminated.";
+          };
+
+          if (EnableVerboseRemarks)
+            emitRemark<OptimizationRemark>(I, "OMP190", Remark);
+          I->eraseFromParent();
+        }
+      }
+    }
+
+    return Changed;
+  }
+
   void analysisGlobalization() {
     auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
 
diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
index 21395460bccb..e104ae00e916 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
@@ -23,6 +23,7 @@
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/ProfileData/SampleProf.h"
 #include "llvm/Support/CRC.h"
diff --git a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index daaf6cbeb3fd..52708ff2f226 100644
--- a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -535,7 +535,7 @@ void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,
   // the information that is needed by thin link will be written in the
   // given OS.
   if (ThinLinkOS && Index)
-    WriteThinLinkBitcodeToFile(M, *ThinLinkOS, *Index, ModHash);
+    writeThinLinkBitcodeToFile(M, *ThinLinkOS, *Index, ModHash);
 }
 
 class WriteThinLTOBitcode : public ModulePass {
diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 6acace1d9fd4..8b30f0e989a1 100644
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -970,7 +970,7 @@ bool DevirtModule::runForTesting(
     if (StringRef(ClWriteSummary).endswith(".bc")) {
       raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_None);
       ExitOnErr(errorCodeToError(EC));
-      WriteIndexToFile(*Summary, OS);
+      writeIndexToFile(*Summary, OS);
     } else {
       raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_TextWithCRLF);
       ExitOnErr(errorCodeToError(EC));
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 1fb46af46bee..05b28328afbf 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2468,10 +2468,28 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
 
 // Fence instruction simplification
 Instruction *InstCombinerImpl::visitFenceInst(FenceInst &FI) {
-  // Remove identical consecutive fences.
-  Instruction *Next = FI.getNextNonDebugInstruction();
-  if (auto *NFI = dyn_cast<FenceInst>(Next))
-    if (FI.isIdenticalTo(NFI))
+  auto *NFI = dyn_cast<FenceInst>(FI.getNextNonDebugInstruction());
+  // This check is solely here to handle arbitrary target-dependent syncscopes.
+  // TODO: Can remove if does not matter in practice.
+  if (NFI && FI.isIdenticalTo(NFI))
+    return eraseInstFromFunction(FI);
+
+  // Returns true if FI1 is identical or stronger fence than FI2.
+  auto isIdenticalOrStrongerFence = [](FenceInst *FI1, FenceInst *FI2) {
+    auto FI1SyncScope = FI1->getSyncScopeID();
+    // Consider same scope, where scope is global or single-thread.
+    if (FI1SyncScope != FI2->getSyncScopeID() ||
+        (FI1SyncScope != SyncScope::System &&
+         FI1SyncScope != SyncScope::SingleThread))
+      return false;
+
+    return isAtLeastOrStrongerThan(FI1->getOrdering(), FI2->getOrdering());
+  };
+  if (NFI && isIdenticalOrStrongerFence(NFI, &FI))
+    return eraseInstFromFunction(FI);
+
+  if (auto *PFI = dyn_cast_or_null<FenceInst>(FI.getPrevNonDebugInstruction()))
+    if (isIdenticalOrStrongerFence(PFI, &FI))
       return eraseInstFromFunction(FI);
   return nullptr;
 }
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index fd58a44504b3..e45be5745fcc 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -5882,6 +5882,55 @@ static Instruction *foldICmpInvariantGroup(ICmpInst &I) {
   return nullptr;
 }
 
+/// This function folds patterns produced by lowering of reduce idioms, such as
+/// llvm.vector.reduce.and which are lowered into instruction chains. This code
+/// attempts to generate fewer number of scalar comparisons instead of vector
+/// comparisons when possible.
+static Instruction *foldReductionIdiom(ICmpInst &I,
+                                       InstCombiner::BuilderTy &Builder,
+                                       const DataLayout &DL) {
+  if (I.getType()->isVectorTy())
+    return nullptr;
+  ICmpInst::Predicate OuterPred, InnerPred;
+  Value *LHS, *RHS;
+
+  // Match lowering of @llvm.vector.reduce.and. Turn
+  ///   %vec_ne = icmp ne <8 x i8> %lhs, %rhs
+  ///   %scalar_ne = bitcast <8 x i1> %vec_ne to i8
+  ///   %res = icmp <pred> i8 %scalar_ne, 0
+  ///
+  /// into
+  ///
+  ///   %lhs.scalar = bitcast <8 x i8> %lhs to i64
+  ///   %rhs.scalar = bitcast <8 x i8> %rhs to i64
+  ///   %res = icmp <pred> i64 %lhs.scalar, %rhs.scalar
+  ///
+  /// for <pred> in {ne, eq}.
+  if (!match(&I, m_ICmp(OuterPred,
+                        m_OneUse(m_BitCast(m_OneUse(
+                            m_ICmp(InnerPred, m_Value(LHS), m_Value(RHS))))),
+                        m_Zero())))
+    return nullptr;
+  auto *LHSTy = dyn_cast<FixedVectorType>(LHS->getType());
+  if (!LHSTy || !LHSTy->getElementType()->isIntegerTy())
+    return nullptr;
+  unsigned NumBits =
+      LHSTy->getNumElements() * LHSTy->getElementType()->getIntegerBitWidth();
+  // TODO: Relax this to "not wider than max legal integer type"?
+  if (!DL.isLegalInteger(NumBits))
+    return nullptr;
+
+  if (ICmpInst::isEquality(OuterPred) && InnerPred == ICmpInst::ICMP_NE) {
+    auto *ScalarTy = Builder.getIntNTy(NumBits);
+    LHS = Builder.CreateBitCast(LHS, ScalarTy, LHS->getName() + ".scalar");
+    RHS = Builder.CreateBitCast(RHS, ScalarTy, RHS->getName() + ".scalar");
+    return ICmpInst::Create(Instruction::ICmp, OuterPred, LHS, RHS,
+                            I.getName());
+  }
+
+  return nullptr;
+}
+
 Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
   bool Changed = false;
   const SimplifyQuery Q = SQ.getWithInstruction(&I);
@@ -6124,6 +6173,9 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
   if (Instruction *Res = foldICmpInvariantGroup(I))
     return Res;
 
+  if (Instruction *Res = foldReductionIdiom(I, Builder, DL))
+    return Res;
+
   return Changed ? &I : nullptr;
 }
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 30f6aab2114b..09694d50468f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -46,8 +46,8 @@ void InstCombinerImpl::PHIArgMergedDebugLoc(Instruction *Inst, PHINode &PN) {
   // will be inefficient.
   assert(!isa<CallInst>(Inst));
 
-  for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
-    auto *I = cast<Instruction>(PN.getIncomingValue(i));
+  for (Value *V : drop_begin(PN.incoming_values())) {
+    auto *I = cast<Instruction>(V);
     Inst->applyMergedLocation(Inst->getDebugLoc(), I->getDebugLoc());
   }
 }
@@ -138,8 +138,9 @@ Instruction *InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) {
     return nullptr;
 
   SmallVector<Value *, 4> AvailablePtrVals;
-  for (unsigned i = 0; i != PN.getNumIncomingValues(); ++i) {
-    Value *Arg = PN.getIncomingValue(i);
+  for (auto Incoming : zip(PN.blocks(), PN.incoming_values())) {
+    BasicBlock *BB = std::get<0>(Incoming);
+    Value *Arg = std::get<1>(Incoming);
 
     // First look backward:
     if (auto *PI = dyn_cast<PtrToIntInst>(Arg)) {
@@ -151,8 +152,8 @@ Instruction *InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) {
     Value *ArgIntToPtr = nullptr;
     for (User *U : Arg->users()) {
       if (isa<IntToPtrInst>(U) && U->getType() == IntToPtr->getType() &&
-          (DT.dominates(cast<Instruction>(U), PN.getIncomingBlock(i)) ||
-           cast<Instruction>(U)->getParent() == PN.getIncomingBlock(i))) {
+          (DT.dominates(cast<Instruction>(U), BB) ||
+           cast<Instruction>(U)->getParent() == BB)) {
         ArgIntToPtr = U;
         break;
       }
@@ -190,26 +191,21 @@ Instruction *InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) {
          "Not enough available ptr typed incoming values");
   PHINode *MatchingPtrPHI = nullptr;
   unsigned NumPhis = 0;
-  for (auto II = BB->begin(); II != BB->end(); II++, NumPhis++) {
+  for (PHINode &PtrPHI : BB->phis()) {
     // FIXME: consider handling this in AggressiveInstCombine
-    PHINode *PtrPHI = dyn_cast<PHINode>(II);
-    if (!PtrPHI)
-      break;
-    if (NumPhis > MaxNumPhis)
+    if (NumPhis++ > MaxNumPhis)
       return nullptr;
-    if (PtrPHI == &PN || PtrPHI->getType() != IntToPtr->getType())
+    if (&PtrPHI == &PN || PtrPHI.getType() != IntToPtr->getType())
       continue;
-    MatchingPtrPHI = PtrPHI;
-    for (unsigned i = 0; i != PtrPHI->getNumIncomingValues(); ++i) {
-      if (AvailablePtrVals[i] !=
-          PtrPHI->getIncomingValueForBlock(PN.getIncomingBlock(i))) {
-        MatchingPtrPHI = nullptr;
-        break;
-      }
-    }
-
-    if (MatchingPtrPHI)
-      break;
+    if (any_of(zip(PN.blocks(), AvailablePtrVals),
+               [&](const auto &BlockAndValue) {
+                 BasicBlock *BB = std::get<0>(BlockAndValue);
+                 Value *V = std::get<1>(BlockAndValue);
+                 return PtrPHI.getIncomingValueForBlock(BB) != V;
+               }))
+      continue;
+    MatchingPtrPHI = &PtrPHI;
+    break;
   }
 
   if (MatchingPtrPHI) {
@@ -250,9 +246,9 @@ Instruction *InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) {
 
   InsertNewInstBefore(NewPtrPHI, PN);
   SmallDenseMap<Value *, Instruction *> Casts;
-  for (unsigned i = 0; i != PN.getNumIncomingValues(); ++i) {
-    auto *IncomingBB = PN.getIncomingBlock(i);
-    auto *IncomingVal = AvailablePtrVals[i];
+  for (auto Incoming : zip(PN.blocks(), AvailablePtrVals)) {
+    auto *IncomingBB = std::get<0>(Incoming);
+    auto *IncomingVal = std::get<1>(Incoming);
 
     if (IncomingVal->getType() == IntToPtr->getType()) {
       NewPtrPHI->addIncoming(IncomingVal, IncomingBB);
@@ -330,8 +326,8 @@ InstCombinerImpl::foldPHIArgInsertValueInstructionIntoPHI(PHINode &PN) {
 
   // Scan to see if all operands are `insertvalue`'s with the same indicies,
   // and all have a single use.
-  for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
-    auto *I = dyn_cast<InsertValueInst>(PN.getIncomingValue(i));
+  for (Value *V : drop_begin(PN.incoming_values())) {
+    auto *I = dyn_cast<InsertValueInst>(V);
     if (!I || !I->hasOneUser() || I->getIndices() != FirstIVI->getIndices())
       return nullptr;
   }
@@ -370,8 +366,8 @@ InstCombinerImpl::foldPHIArgExtractValueInstructionIntoPHI(PHINode &PN) {
 
   // Scan to see if all operands are `extractvalue`'s with the same indicies,
   // and all have a single use.
-  for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
-    auto *I = dyn_cast<ExtractValueInst>(PN.getIncomingValue(i));
+  for (Value *V : drop_begin(PN.incoming_values())) {
+    auto *I = dyn_cast<ExtractValueInst>(V);
     if (!I || !I->hasOneUser() || I->getIndices() != FirstEVI->getIndices() ||
         I->getAggregateOperand()->getType() !=
             FirstEVI->getAggregateOperand()->getType())
@@ -412,8 +408,8 @@ Instruction *InstCombinerImpl::foldPHIArgBinOpIntoPHI(PHINode &PN) {
   Type *RHSType = RHSVal->getType();
 
   // Scan to see if all operands are the same opcode, and all have one user.
-  for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
-    Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
+  for (Value *V : drop_begin(PN.incoming_values())) {
+    Instruction *I = dyn_cast<Instruction>(V);
     if (!I || I->getOpcode() != Opc || !I->hasOneUser() ||
         // Verify type of the LHS matches so we don't fold cmp's of different
         // types.
@@ -461,15 +457,17 @@ Instruction *InstCombinerImpl::foldPHIArgBinOpIntoPHI(PHINode &PN) {
 
   // Add all operands to the new PHIs.
   if (NewLHS || NewRHS) {
-    for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
-      Instruction *InInst = cast<Instruction>(PN.getIncomingValue(i));
+    for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) {
+      BasicBlock *InBB = std::get<0>(Incoming);
+      Value *InVal = std::get<1>(Incoming);
+      Instruction *InInst = cast<Instruction>(InVal);
       if (NewLHS) {
         Value *NewInLHS = InInst->getOperand(0);
-        NewLHS->addIncoming(NewInLHS, PN.getIncomingBlock(i));
+        NewLHS->addIncoming(NewInLHS, InBB);
       }
       if (NewRHS) {
         Value *NewInRHS = InInst->getOperand(1);
-        NewRHS->addIncoming(NewInRHS, PN.getIncomingBlock(i));
+        NewRHS->addIncoming(NewInRHS, InBB);
       }
     }
   }
@@ -487,8 +485,8 @@ Instruction *InstCombinerImpl::foldPHIArgBinOpIntoPHI(PHINode &PN) {
 
   NewBinOp->copyIRFlags(PN.getIncomingValue(0));
 
-  for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i)
-    NewBinOp->andIRFlags(PN.getIncomingValue(i));
+  for (Value *V : drop_begin(PN.incoming_values()))
+    NewBinOp->andIRFlags(V);
 
   PHIArgMergedDebugLoc(NewBinOp, PN);
   return NewBinOp;
@@ -511,9 +509,8 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) {
   bool AllInBounds = true;
 
   // Scan to see if all operands are the same opcode, and all have one user.
-  for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
-    GetElementPtrInst *GEP =
-        dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));
+  for (Value *V : drop_begin(PN.incoming_values())) {
+    GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V);
     if (!GEP || !GEP->hasOneUser() || GEP->getType() != FirstInst->getType() ||
         GEP->getNumOperands() != FirstInst->getNumOperands())
       return nullptr;
@@ -527,8 +524,8 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) {
       AllBasePointersAreAllocas = false;
 
     // Compare the operand lists.
-    for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) {
-      if (FirstInst->getOperand(op) == GEP->getOperand(op))
+    for (unsigned Op = 0, E = FirstInst->getNumOperands(); Op != E; ++Op) {
+      if (FirstInst->getOperand(Op) == GEP->getOperand(Op))
         continue;
 
       // Don't merge two GEPs when two operands differ (introducing phi nodes)
@@ -536,11 +533,12 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) {
       // substantially cheaper to compute for the constants, so making it a
       // variable index could pessimize the path.  This also handles the case
       // for struct indices, which must always be constant.
-      if (isa<ConstantInt>(FirstInst->getOperand(op)) ||
-          isa<ConstantInt>(GEP->getOperand(op)))
+      if (isa<ConstantInt>(FirstInst->getOperand(Op)) ||
+          isa<ConstantInt>(GEP->getOperand(Op)))
         return nullptr;
 
-      if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType())
+      if (FirstInst->getOperand(Op)->getType() !=
+          GEP->getOperand(Op)->getType())
         return nullptr;
 
       // If we already needed a PHI for an earlier operand, and another operand
@@ -550,7 +548,7 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) {
       if (NeededPhi)
         return nullptr;
 
-      FixedOperands[op] = nullptr;  // Needs a PHI.
+      FixedOperands[Op] = nullptr; // Needs a PHI.
       NeededPhi = true;
     }
   }
@@ -569,29 +567,30 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) {
   SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size());
 
   bool HasAnyPHIs = false;
-  for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) {
-    if (FixedOperands[i]) continue;  // operand doesn't need a phi.
-    Value *FirstOp = FirstInst->getOperand(i);
-    PHINode *NewPN = PHINode::Create(FirstOp->getType(), e,
-                                     FirstOp->getName()+".pn");
+  for (unsigned I = 0, E = FixedOperands.size(); I != E; ++I) {
+    if (FixedOperands[I])
+      continue; // operand doesn't need a phi.
+    Value *FirstOp = FirstInst->getOperand(I);
+    PHINode *NewPN =
+        PHINode::Create(FirstOp->getType(), E, FirstOp->getName() + ".pn");
     InsertNewInstBefore(NewPN, PN);
 
     NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0));
-    OperandPhis[i] = NewPN;
-    FixedOperands[i] = NewPN;
+    OperandPhis[I] = NewPN;
+    FixedOperands[I] = NewPN;
     HasAnyPHIs = true;
   }
 
-
   // Add all operands to the new PHIs.
   if (HasAnyPHIs) {
-    for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
-      GetElementPtrInst *InGEP =cast<GetElementPtrInst>(PN.getIncomingValue(i));
-      BasicBlock *InBB = PN.getIncomingBlock(i);
-
-      for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op)
-        if (PHINode *OpPhi = OperandPhis[op])
-          OpPhi->addIncoming(InGEP->getOperand(op), InBB);
+    for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) {
+      BasicBlock *InBB = std::get<0>(Incoming);
+      Value *InVal = std::get<1>(Incoming);
+      GetElementPtrInst *InGEP = cast<GetElementPtrInst>(InVal);
+
+      for (unsigned Op = 0, E = OperandPhis.size(); Op != E; ++Op)
+        if (PHINode *OpPhi = OperandPhis[Op])
+          OpPhi->addIncoming(InGEP->getOperand(Op), InBB);
     }
   }
 
@@ -627,18 +626,18 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
   // Check for non-address taken alloca.  If not address-taken already, it isn't
   // profitable to do this xform.
   if (AllocaInst *AI = dyn_cast<AllocaInst>(L->getOperand(0))) {
-    bool isAddressTaken = false;
+    bool IsAddressTaken = false;
     for (User *U : AI->users()) {
       if (isa<LoadInst>(U)) continue;
       if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
         // If storing TO the alloca, then the address isn't taken.
         if (SI->getOperand(1) == AI) continue;
       }
-      isAddressTaken = true;
+      IsAddressTaken = true;
       break;
     }
 
-    if (!isAddressTaken && AI->isStaticAlloca())
+    if (!IsAddressTaken && AI->isStaticAlloca())
       return false;
   }
 
@@ -665,9 +664,9 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
 
   // When processing loads, we need to propagate two bits of information to the
   // sunk load: whether it is volatile, and what its alignment is.
-  bool isVolatile = FirstLI->isVolatile();
+  bool IsVolatile = FirstLI->isVolatile();
   Align LoadAlignment = FirstLI->getAlign();
-  unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace();
+  const unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace();
 
   // We can't sink the load if the loaded value could be modified between the
   // load and the PHI.
@@ -678,22 +677,25 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
   // If the PHI is of volatile loads and the load block has multiple
   // successors, sinking it would remove a load of the volatile value from
   // the path through the other successor.
-  if (isVolatile &&
+  if (IsVolatile &&
       FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1)
     return nullptr;
 
-  // Check to see if all arguments are the same operation.
-  for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
-    LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i));
-    if (!LI || !LI->hasOneUser())
+  for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) {
+    BasicBlock *InBB = std::get<0>(Incoming);
+    Value *InVal = std::get<1>(Incoming);
+    LoadInst *LI = dyn_cast<LoadInst>(InVal);
+    if (!LI || !LI->hasOneUser() || LI->isAtomic())
+      return nullptr;
+
+    // Make sure all arguments are the same type of operation.
+    if (LI->isVolatile() != IsVolatile ||
+        LI->getPointerAddressSpace() != LoadAddrSpace)
       return nullptr;
 
     // We can't sink the load if the loaded value could be modified between
     // the load and the PHI.
-    if (LI->isVolatile() != isVolatile ||
-        LI->getParent() != PN.getIncomingBlock(i) ||
-        LI->getPointerAddressSpace() != LoadAddrSpace ||
-        !isSafeAndProfitableToSinkLoad(LI))
+    if (LI->getParent() != InBB || !isSafeAndProfitableToSinkLoad(LI))
       return nullptr;
 
     LoadAlignment = std::min(LoadAlignment, LI->getAlign());
@@ -701,8 +703,7 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
     // If the PHI is of volatile loads and the load block has multiple
     // successors, sinking it would remove a load of the volatile value from
     // the path through the other successor.
-    if (isVolatile &&
-        LI->getParent()->getTerminator()->getNumSuccessors() != 1)
+    if (IsVolatile && LI->getParent()->getTerminator()->getNumSuccessors() != 1)
       return nullptr;
   }
 
@@ -715,7 +716,7 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
   Value *InVal = FirstLI->getOperand(0);
   NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
   LoadInst *NewLI =
-      new LoadInst(FirstLI->getType(), NewPN, "", isVolatile, LoadAlignment);
+      new LoadInst(FirstLI->getType(), NewPN, "", IsVolatile, LoadAlignment);
 
   unsigned KnownIDs[] = {
     LLVMContext::MD_tbaa,
@@ -734,13 +735,15 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
     NewLI->setMetadata(ID, FirstLI->getMetadata(ID));
 
   // Add all operands to the new PHI and combine TBAA metadata.
-  for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
-    LoadInst *LI = cast<LoadInst>(PN.getIncomingValue(i));
+  for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) {
+    BasicBlock *BB = std::get<0>(Incoming);
+    Value *V = std::get<1>(Incoming);
+    LoadInst *LI = cast<LoadInst>(V);
     combineMetadata(NewLI, LI, KnownIDs, true);
     Value *NewInVal = LI->getOperand(0);
     if (NewInVal != InVal)
       InVal = nullptr;
-    NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
+    NewPN->addIncoming(NewInVal, BB);
   }
 
   if (InVal) {
@@ -755,7 +758,7 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
   // If this was a volatile load that we are merging, make sure to loop through
   // and mark all the input loads as non-volatile.  If we don't do this, we will
   // insert a new volatile load and the old ones will not be deletable.
-  if (isVolatile)
+  if (IsVolatile)
     for (Value *IncValue : PN.incoming_values())
       cast<LoadInst>(IncValue)->setVolatile(false);
 
@@ -830,8 +833,8 @@ Instruction *InstCombinerImpl::foldPHIArgZextsIntoPHI(PHINode &Phi) {
   // operands, and zext the result back to the original type.
   PHINode *NewPhi = PHINode::Create(NarrowType, NumIncomingValues,
                                     Phi.getName() + ".shrunk");
-  for (unsigned i = 0; i != NumIncomingValues; ++i)
-    NewPhi->addIncoming(NewIncoming[i], Phi.getIncomingBlock(i));
+  for (unsigned I = 0; I != NumIncomingValues; ++I)
+    NewPhi->addIncoming(NewIncoming[I], Phi.getIncomingBlock(I));
 
   InsertNewInstBefore(NewPhi, Phi);
   return CastInst::CreateZExtOrBitCast(NewPhi, Phi.getType());
@@ -885,13 +888,13 @@ Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) {
   }
 
   // Check to see if all arguments are the same operation.
-  for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
-    Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
+  for (Value *V : drop_begin(PN.incoming_values())) {
+    Instruction *I = dyn_cast<Instruction>(V);
     if (!I || !I->hasOneUser() || !I->isSameOperationAs(FirstInst))
       return nullptr;
     if (CastSrcTy) {
       if (I->getOperand(0)->getType() != CastSrcTy)
-        return nullptr;  // Cast operation must match.
+        return nullptr; // Cast operation must match.
     } else if (I->getOperand(1) != ConstantOp) {
       return nullptr;
     }
@@ -907,11 +910,13 @@ Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) {
   NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
 
   // Add all operands to the new PHI.
-  for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
-    Value *NewInVal = cast<Instruction>(PN.getIncomingValue(i))->getOperand(0);
+  for (auto Incoming : drop_begin(zip(PN.blocks(), PN.incoming_values()))) {
+    BasicBlock *BB = std::get<0>(Incoming);
+    Value *V = std::get<1>(Incoming);
+    Value *NewInVal = cast<Instruction>(V)->getOperand(0);
     if (NewInVal != InVal)
       InVal = nullptr;
-    NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
+    NewPN->addIncoming(NewInVal, BB);
   }
 
   Value *PhiVal;
@@ -937,8 +942,8 @@ Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) {
     BinOp = BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
     BinOp->copyIRFlags(PN.getIncomingValue(0));
 
-    for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i)
-      BinOp->andIRFlags(PN.getIncomingValue(i));
+    for (Value *V : drop_begin(PN.incoming_values()))
+      BinOp->andIRFlags(V);
 
     PHIArgMergedDebugLoc(BinOp, PN);
     return BinOp;
@@ -952,8 +957,8 @@ Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) {
 }
 
 /// Return true if this PHI node is only used by a PHI node cycle that is dead.
-static bool DeadPHICycle(PHINode *PN,
-                         SmallPtrSetImpl<PHINode*> &PotentiallyDeadPHIs) {
+static bool isDeadPHICycle(PHINode *PN,
+                           SmallPtrSetImpl<PHINode *> &PotentiallyDeadPHIs) {
   if (PN->use_empty()) return true;
   if (!PN->hasOneUse()) return false;
 
@@ -966,7 +971,7 @@ static bool DeadPHICycle(PHINode *PN,
     return false;
 
   if (PHINode *PU = dyn_cast<PHINode>(PN->user_back()))
-    return DeadPHICycle(PU, PotentiallyDeadPHIs);
+    return isDeadPHICycle(PU, PotentiallyDeadPHIs);
 
   return false;
 }
@@ -999,7 +1004,7 @@ static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
 
 /// Return an existing non-zero constant if this phi node has one, otherwise
 /// return constant 1.
-static ConstantInt *GetAnyNonZeroConstInt(PHINode &PN) {
+static ConstantInt *getAnyNonZeroConstInt(PHINode &PN) {
   assert(isa<IntegerType>(PN.getType()) && "Expect only integer type phi");
   for (Value *V : PN.operands())
     if (auto *ConstVA = dyn_cast<ConstantInt>(V))
@@ -1014,8 +1019,8 @@ struct PHIUsageRecord {
   unsigned Shift;     // The amount shifted.
   Instruction *Inst;  // The trunc instruction.
 
-  PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User)
-    : PHIId(pn), Shift(Sh), Inst(User) {}
+  PHIUsageRecord(unsigned Pn, unsigned Sh, Instruction *User)
+      : PHIId(Pn), Shift(Sh), Inst(User) {}
 
   bool operator<(const PHIUsageRecord &RHS) const {
     if (PHIId < RHS.PHIId) return true;
@@ -1032,12 +1037,11 @@ struct LoweredPHIRecord {
   unsigned Shift;     // The amount shifted.
   unsigned Width;     // The width extracted.
 
-  LoweredPHIRecord(PHINode *pn, unsigned Sh, Type *Ty)
-    : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {}
+  LoweredPHIRecord(PHINode *Phi, unsigned Sh, Type *Ty)
+      : PN(Phi), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {}
 
   // Ctor form used by DenseMap.
-  LoweredPHIRecord(PHINode *pn, unsigned Sh)
-    : PN(pn), Shift(Sh), Width(0) {}
+  LoweredPHIRecord(PHINode *Phi, unsigned Sh) : PN(Phi), Shift(Sh), Width(0) {}
 };
 } // namespace
 
@@ -1093,10 +1097,13 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
     // input is defined in the predecessor, then we won't be split the critical
     // edge which is required to insert a truncate.  Because of this, we have to
     // bail out.
-    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-      InvokeInst *II = dyn_cast<InvokeInst>(PN->getIncomingValue(i));
-      if (!II) continue;
-      if (II->getParent() != PN->getIncomingBlock(i))
+    for (auto Incoming : zip(PN->blocks(), PN->incoming_values())) {
+      BasicBlock *BB = std::get<0>(Incoming);
+      Value *V = std::get<1>(Incoming);
+      InvokeInst *II = dyn_cast<InvokeInst>(V);
+      if (!II)
+        continue;
+      if (II->getParent() != BB)
         continue;
 
       // If we have a phi, and if it's directly in the predecessor, then we have
@@ -1146,8 +1153,8 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
   array_pod_sort(PHIUsers.begin(), PHIUsers.end());
 
   LLVM_DEBUG(dbgs() << "SLICING UP PHI: " << FirstPhi << '\n';
-             for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i) dbgs()
-             << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] << '\n';);
+             for (unsigned I = 1; I != PHIsToSlice.size(); ++I) dbgs()
+             << "AND USER PHI #" << I << ": " << *PHIsToSlice[I] << '\n');
 
   // PredValues - This is a temporary used when rewriting PHI nodes.  It is
   // hoisted out here to avoid construction/destruction thrashing.
@@ -1175,8 +1182,9 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
       assert(EltPHI->getType() != PN->getType() &&
              "Truncate didn't shrink phi?");
 
-      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-        BasicBlock *Pred = PN->getIncomingBlock(i);
+      for (auto Incoming : zip(PN->blocks(), PN->incoming_values())) {
+        BasicBlock *Pred = std::get<0>(Incoming);
+        Value *InVal = std::get<1>(Incoming);
         Value *&PredVal = PredValues[Pred];
 
         // If we already have a value for this predecessor, reuse it.
@@ -1186,7 +1194,6 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
         }
 
         // Handle the PHI self-reuse case.
-        Value *InVal = PN->getIncomingValue(i);
         if (InVal == PN) {
           PredVal = EltPHI;
           EltPHI->addIncoming(PredVal, Pred);
@@ -1207,8 +1214,8 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
         Builder.SetInsertPoint(Pred->getTerminator());
         Value *Res = InVal;
         if (Offset)
-          Res = Builder.CreateLShr(Res, ConstantInt::get(InVal->getType(),
-                                                          Offset), "extract");
+          Res = Builder.CreateLShr(
+              Res, ConstantInt::get(InVal->getType(), Offset), "extract");
         Res = Builder.CreateTrunc(Res, Ty, "extract.t");
         PredVal = Res;
         EltPHI->addIncoming(Res, Pred);
@@ -1217,12 +1224,12 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
         // rewriting, we will ultimately delete the code we inserted.  This
         // means we need to revisit that PHI to make sure we extract out the
         // needed piece.
-        if (PHINode *OldInVal = dyn_cast<PHINode>(PN->getIncomingValue(i)))
+        if (PHINode *OldInVal = dyn_cast<PHINode>(InVal))
           if (PHIsInspected.count(OldInVal)) {
             unsigned RefPHIId =
                 find(PHIsToSlice, OldInVal) - PHIsToSlice.begin();
-            PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset,
-                                              cast<Instruction>(Res)));
+            PHIUsers.push_back(
+                PHIUsageRecord(RefPHIId, Offset, cast<Instruction>(Res)));
             ++UserE;
           }
       }
@@ -1240,12 +1247,12 @@ Instruction *InstCombinerImpl::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
   // Replace all the remaining uses of the PHI nodes (self uses and the lshrs)
   // with poison.
   Value *Poison = PoisonValue::get(FirstPhi.getType());
-  for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
-    replaceInstUsesWith(*PHIsToSlice[i], Poison);
+  for (PHINode *PHI : drop_begin(PHIsToSlice))
+    replaceInstUsesWith(*PHI, Poison);
   return replaceInstUsesWith(FirstPhi, Poison);
 }
 
-static Value *SimplifyUsingControlFlow(InstCombiner &Self, PHINode &PN,
+static Value *simplifyUsingControlFlow(InstCombiner &Self, PHINode &PN,
                                        const DominatorTree &DT) {
   // Simplify the following patterns:
   //       if (cond)
@@ -1302,8 +1309,8 @@ static Value *SimplifyUsingControlFlow(InstCombiner &Self, PHINode &PN,
       DT.dominates(FalseOutEdge, FalseIncEdge))
     // This Phi is actually equivalent to branching condition of IDom.
     return Cond;
-  else if (DT.dominates(TrueOutEdge, FalseIncEdge) &&
-           DT.dominates(FalseOutEdge, TrueIncEdge)) {
+  if (DT.dominates(TrueOutEdge, FalseIncEdge) &&
+      DT.dominates(FalseOutEdge, TrueIncEdge)) {
     // This Phi is actually opposite to branching condition of IDom. We invert
     // the condition that will potentially open up some opportunities for
     // sinking.
@@ -1369,7 +1376,7 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
     if (PHINode *PU = dyn_cast<PHINode>(PHIUser)) {
       SmallPtrSet<PHINode*, 16> PotentiallyDeadPHIs;
       PotentiallyDeadPHIs.insert(&PN);
-      if (DeadPHICycle(PU, PotentiallyDeadPHIs))
+      if (isDeadPHICycle(PU, PotentiallyDeadPHIs))
         return replaceInstUsesWith(PN, PoisonValue::get(PN.getType()));
     }
 
@@ -1398,15 +1405,15 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
         match(CmpInst->getOperand(1), m_Zero())) {
       ConstantInt *NonZeroConst = nullptr;
       bool MadeChange = false;
-      for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
-        Instruction *CtxI = PN.getIncomingBlock(i)->getTerminator();
-        Value *VA = PN.getIncomingValue(i);
+      for (unsigned I = 0, E = PN.getNumIncomingValues(); I != E; ++I) {
+        Instruction *CtxI = PN.getIncomingBlock(I)->getTerminator();
+        Value *VA = PN.getIncomingValue(I);
         if (isKnownNonZero(VA, DL, 0, &AC, CtxI, &DT)) {
           if (!NonZeroConst)
-            NonZeroConst = GetAnyNonZeroConstInt(PN);
+            NonZeroConst = getAnyNonZeroConstInt(PN);
 
           if (NonZeroConst != VA) {
-            replaceOperand(PN, i, NonZeroConst);
+            replaceOperand(PN, I, NonZeroConst);
             MadeChange = true;
           }
         }
@@ -1457,17 +1464,17 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
   // however.
   PHINode *FirstPN = cast<PHINode>(PN.getParent()->begin());
   if (&PN != FirstPN)
-    for (unsigned i = 0, e = FirstPN->getNumIncomingValues(); i != e; ++i) {
-      BasicBlock *BBA = PN.getIncomingBlock(i);
-      BasicBlock *BBB = FirstPN->getIncomingBlock(i);
+    for (unsigned I = 0, E = FirstPN->getNumIncomingValues(); I != E; ++I) {
+      BasicBlock *BBA = PN.getIncomingBlock(I);
+      BasicBlock *BBB = FirstPN->getIncomingBlock(I);
       if (BBA != BBB) {
-        Value *VA = PN.getIncomingValue(i);
-        unsigned j = PN.getBasicBlockIndex(BBB);
-        Value *VB = PN.getIncomingValue(j);
-        PN.setIncomingBlock(i, BBB);
-        PN.setIncomingValue(i, VB);
-        PN.setIncomingBlock(j, BBA);
-        PN.setIncomingValue(j, VA);
+        Value *VA = PN.getIncomingValue(I);
+        unsigned J = PN.getBasicBlockIndex(BBB);
+        Value *VB = PN.getIncomingValue(J);
+        PN.setIncomingBlock(I, BBB);
+        PN.setIncomingValue(I, VB);
+        PN.setIncomingBlock(J, BBA);
+        PN.setIncomingValue(J, VA);
         // NOTE: Instcombine normally would want us to "return &PN" if we
         // modified any of the operands of an instruction.  However, since we
         // aren't adding or removing uses (just rearranging them) we don't do
@@ -1500,7 +1507,7 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
       return Res;
 
   // Ultimately, try to replace this Phi with a dominating condition.
-  if (auto *V = SimplifyUsingControlFlow(*this, PN, DT))
+  if (auto *V = simplifyUsingControlFlow(*this, PN, DT))
     return replaceInstUsesWith(PN, V);
 
   return nullptr;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 71a5ae24eead..3f064cfda712 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -1219,7 +1219,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
       for (auto I = gep_type_begin(GEP), E = gep_type_end(GEP);
            I != E; I++)
         if (I.isStruct())
-          return true;;
+          return true;
       return false;
     };
     if (mayIndexStructType(cast<GetElementPtrInst>(*I)))
@@ -1228,10 +1228,11 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
     // Conservatively track the demanded elements back through any vector
     // operands we may have.  We know there must be at least one, or we
     // wouldn't have a vector result to get here. Note that we intentionally
-    // merge the undef bits here since gepping with either an undef base or
-    // index results in undef.
+    // merge the undef bits here since gepping with either an poison base or
+    // index results in poison.
     for (unsigned i = 0; i < I->getNumOperands(); i++) {
-      if (match(I->getOperand(i), m_Undef())) {
+      if (i == 0 ? match(I->getOperand(i), m_Undef())
+                 : match(I->getOperand(i), m_Poison())) {
         // If the entire vector is undefined, just return this info.
         UndefElts = EltMask;
         return nullptr;
@@ -1239,7 +1240,11 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
       if (I->getOperand(i)->getType()->isVectorTy()) {
         APInt UndefEltsOp(VWidth, 0);
         simplifyAndSetOp(I, i, DemandedElts, UndefEltsOp);
-        UndefElts |= UndefEltsOp;
+        // gep(x, undef) is not undef, so skip considering idx ops here
+        // Note that we could propagate poison, but we can't distinguish between
+        // undef & poison bits ATM
+        if (i == 0)
+          UndefElts |= UndefEltsOp;
       }
     }
 
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 029be5257694..3091905ca534 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -68,6 +68,7 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 6e72255e51ae..8f94172a6402 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -1527,22 +1527,22 @@ void AddressSanitizer::getInterestingMemoryOperands(
     return;
 
   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
-    if (!ClInstrumentReads || ignoreAccess(LI, LI->getPointerOperand()))
+    if (!ClInstrumentReads || ignoreAccess(I, LI->getPointerOperand()))
       return;
     Interesting.emplace_back(I, LI->getPointerOperandIndex(), false,
                              LI->getType(), LI->getAlign());
   } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
-    if (!ClInstrumentWrites || ignoreAccess(LI, SI->getPointerOperand()))
+    if (!ClInstrumentWrites || ignoreAccess(I, SI->getPointerOperand()))
       return;
     Interesting.emplace_back(I, SI->getPointerOperandIndex(), true,
                              SI->getValueOperand()->getType(), SI->getAlign());
   } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
-    if (!ClInstrumentAtomics || ignoreAccess(LI, RMW->getPointerOperand()))
+    if (!ClInstrumentAtomics || ignoreAccess(I, RMW->getPointerOperand()))
       return;
     Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true,
                              RMW->getValOperand()->getType(), None);
   } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
-    if (!ClInstrumentAtomics || ignoreAccess(LI, XCHG->getPointerOperand()))
+    if (!ClInstrumentAtomics || ignoreAccess(I, XCHG->getPointerOperand()))
       return;
     Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true,
                              XCHG->getCompareOperand()->getType(), None);
@@ -1556,7 +1556,7 @@ void AddressSanitizer::getInterestingMemoryOperands(
         return;
 
       auto BasePtr = CI->getOperand(OpOffset);
-      if (ignoreAccess(LI, BasePtr))
+      if (ignoreAccess(I, BasePtr))
         return;
       Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
       MaybeAlign Alignment = Align(1);
@@ -1568,7 +1568,7 @@ void AddressSanitizer::getInterestingMemoryOperands(
     } else {
       for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
         if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) ||
-            ignoreAccess(LI, CI->getArgOperand(ArgNo)))
+            ignoreAccess(I, CI->getArgOperand(ArgNo)))
           continue;
         Type *Ty = CI->getParamByValType(ArgNo);
         Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index fb10a99d1338..7b3741d19a1b 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -304,6 +304,7 @@ public:
   static bool isStandardLifetime(const AllocaInfo &AllocaInfo,
                                  const DominatorTree &DT);
   bool instrumentStack(
+      bool ShouldDetectUseAfterScope,
       MapVector<AllocaInst *, AllocaInfo> &AllocasToInstrument,
       SmallVector<Instruction *, 4> &UnrecognizedLifetimes,
       DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> &AllocaDbgMap,
@@ -1359,6 +1360,7 @@ bool HWAddressSanitizer::isStandardLifetime(const AllocaInfo &AllocaInfo,
 }
 
 bool HWAddressSanitizer::instrumentStack(
+    bool ShouldDetectUseAfterScope,
     MapVector<AllocaInst *, AllocaInfo> &AllocasToInstrument,
     SmallVector<Instruction *, 4> &UnrecognizedLifetimes,
     DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> &AllocaDbgMap,
@@ -1410,7 +1412,7 @@ bool HWAddressSanitizer::instrumentStack(
     };
     bool StandardLifetime =
         UnrecognizedLifetimes.empty() && isStandardLifetime(Info, GetDT());
-    if (DetectUseAfterScope && StandardLifetime) {
+    if (ShouldDetectUseAfterScope && StandardLifetime) {
       IntrinsicInst *Start = Info.LifetimeStart[0];
       IRB.SetInsertPoint(Start->getNextNode());
       tagAlloca(IRB, AI, Tag, Size);
@@ -1505,8 +1507,14 @@ bool HWAddressSanitizer::sanitizeFunction(
   SmallVector<Instruction *, 8> LandingPadVec;
   SmallVector<Instruction *, 4> UnrecognizedLifetimes;
   DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> AllocaDbgMap;
+  bool CallsReturnTwice = false;
   for (auto &BB : F) {
     for (auto &Inst : BB) {
+      if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
+        if (CI->canReturnTwice()) {
+          CallsReturnTwice = true;
+        }
+      }
       if (InstrumentStack) {
         if (AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
           if (isInterestingAlloca(*AI))
@@ -1531,9 +1539,14 @@ bool HWAddressSanitizer::sanitizeFunction(
         }
       }
 
-      if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst) ||
-          isa<CleanupReturnInst>(Inst))
+      if (isa<ReturnInst>(Inst)) {
+        if (CallInst *CI = Inst.getParent()->getTerminatingMustTailCall())
+          RetVec.push_back(CI);
+        else
+          RetVec.push_back(&Inst);
+      } else if (isa<ResumeInst, CleanupReturnInst>(Inst)) {
         RetVec.push_back(&Inst);
+      }
 
       if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&Inst)) {
         for (Value *V : DVI->location_ops()) {
@@ -1585,7 +1598,12 @@ bool HWAddressSanitizer::sanitizeFunction(
   if (!AllocasToInstrument.empty()) {
     Value *StackTag =
         ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB);
-    instrumentStack(AllocasToInstrument, UnrecognizedLifetimes, AllocaDbgMap,
+    // Calls to functions that may return twice (e.g. setjmp) confuse the
+    // postdominator analysis, and will leave us to keep memory tagged after
+    // function return. Work around this by always untagging at every return
+    // statement if return_twice functions are called.
+    instrumentStack(DetectUseAfterScope && !CallsReturnTwice,
+                    AllocasToInstrument, UnrecognizedLifetimes, AllocaDbgMap,
                     RetVec, StackTag, GetDT, GetPDT);
   }
   // Pad and align each of the allocas that we instrumented to stop small
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index ab179b03dd29..6868408ef5f5 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -456,6 +456,9 @@ bool InstrProfiling::lowerIntrinsics(Function *F) {
       } else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(&Instr)) {
         lowerIncrement(IPI);
         MadeChange = true;
+      } else if (auto *IPC = dyn_cast<InstrProfCoverInst>(&Instr)) {
+        lowerCover(IPC);
+        MadeChange = true;
       } else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(&Instr)) {
         lowerValueProfileInst(IPVP);
         MadeChange = true;
@@ -539,7 +542,8 @@ static bool containsProfilingIntrinsics(Module &M) {
       return !F->use_empty();
     return false;
   };
-  return containsIntrinsic(llvm::Intrinsic::instrprof_increment) ||
+  return containsIntrinsic(llvm::Intrinsic::instrprof_cover) ||
+         containsIntrinsic(llvm::Intrinsic::instrprof_increment) ||
          containsIntrinsic(llvm::Intrinsic::instrprof_increment_step) ||
          containsIntrinsic(llvm::Intrinsic::instrprof_value_profile);
 }
@@ -689,47 +693,58 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
   Ind->eraseFromParent();
 }
 
-void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
-  GlobalVariable *Counters = getOrCreateRegionCounters(Inc);
-
-  IRBuilder<> Builder(Inc);
-  uint64_t Index = Inc->getIndex()->getZExtValue();
-  Value *Addr = Builder.CreateConstInBoundsGEP2_32(Counters->getValueType(),
-                                                   Counters, 0, Index);
-
-  if (isRuntimeCounterRelocationEnabled()) {
-    Type *Int64Ty = Type::getInt64Ty(M->getContext());
-    Type *Int64PtrTy = Type::getInt64PtrTy(M->getContext());
-    Function *Fn = Inc->getParent()->getParent();
-    Instruction &I = Fn->getEntryBlock().front();
-    LoadInst *LI = dyn_cast<LoadInst>(&I);
-    if (!LI) {
-      IRBuilder<> Builder(&I);
-      GlobalVariable *Bias =
-          M->getGlobalVariable(getInstrProfCounterBiasVarName());
-      if (!Bias) {
-        // Compiler must define this variable when runtime counter relocation
-        // is being used. Runtime has a weak external reference that is used
-        // to check whether that's the case or not.
-        Bias = new GlobalVariable(
-            *M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage,
-            Constant::getNullValue(Int64Ty), getInstrProfCounterBiasVarName());
-        Bias->setVisibility(GlobalVariable::HiddenVisibility);
-        // A definition that's weak (linkonce_odr) without being in a COMDAT
-        // section wouldn't lead to link errors, but it would lead to a dead
-        // data word from every TU but one. Putting it in COMDAT ensures there
-        // will be exactly one data slot in the link.
-        if (TT.supportsCOMDAT())
-          Bias->setComdat(M->getOrInsertComdat(Bias->getName()));
-      }
-      LI = Builder.CreateLoad(Int64Ty, Bias);
+Value *InstrProfiling::getCounterAddress(InstrProfInstBase *I) {
+  auto *Counters = getOrCreateRegionCounters(I);
+  IRBuilder<> Builder(I);
+
+  auto *Addr = Builder.CreateConstInBoundsGEP2_32(
+      Counters->getValueType(), Counters, 0, I->getIndex()->getZExtValue());
+
+  if (!isRuntimeCounterRelocationEnabled())
+    return Addr;
+
+  Type *Int64Ty = Type::getInt64Ty(M->getContext());
+  Function *Fn = I->getParent()->getParent();
+  Instruction &EntryI = Fn->getEntryBlock().front();
+  LoadInst *LI = dyn_cast<LoadInst>(&EntryI);
+  if (!LI) {
+    IRBuilder<> EntryBuilder(&EntryI);
+    auto *Bias = M->getGlobalVariable(getInstrProfCounterBiasVarName());
+    if (!Bias) {
+      // Compiler must define this variable when runtime counter relocation
+      // is being used. Runtime has a weak external reference that is used
+      // to check whether that's the case or not.
+      Bias = new GlobalVariable(
+          *M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage,
+          Constant::getNullValue(Int64Ty), getInstrProfCounterBiasVarName());
+      Bias->setVisibility(GlobalVariable::HiddenVisibility);
+      // A definition that's weak (linkonce_odr) without being in a COMDAT
+      // section wouldn't lead to link errors, but it would lead to a dead
+      // data word from every TU but one. Putting it in COMDAT ensures there
+      // will be exactly one data slot in the link.
+      if (TT.supportsCOMDAT())
+        Bias->setComdat(M->getOrInsertComdat(Bias->getName()));
     }
-    auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), LI);
-    Addr = Builder.CreateIntToPtr(Add, Int64PtrTy);
+    LI = EntryBuilder.CreateLoad(Int64Ty, Bias);
   }
+  auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), LI);
+  return Builder.CreateIntToPtr(Add, Addr->getType());
+}
+
+void InstrProfiling::lowerCover(InstrProfCoverInst *CoverInstruction) {
+  auto *Addr = getCounterAddress(CoverInstruction);
+  IRBuilder<> Builder(CoverInstruction);
+  // We store zero to represent that this block is covered.
+  Builder.CreateStore(Builder.getInt8(0), Addr);
+  CoverInstruction->eraseFromParent();
+}
+
+void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
+  auto *Addr = getCounterAddress(Inc);
 
+  IRBuilder<> Builder(Inc);
   if (Options.Atomic || AtomicCounterUpdateAll ||
-      (Index == 0 && AtomicFirstCounter)) {
+      (Inc->getIndex()->isZeroValue() && AtomicFirstCounter)) {
     Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(),
                             MaybeAlign(), AtomicOrdering::Monotonic);
   } else {
@@ -849,6 +864,31 @@ static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {
 }
 
 GlobalVariable *
+InstrProfiling::createRegionCounters(InstrProfInstBase *Inc, StringRef Name,
+                                     GlobalValue::LinkageTypes Linkage) {
+  uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
+  auto &Ctx = M->getContext();
+  GlobalVariable *GV;
+  if (isa<InstrProfCoverInst>(Inc)) {
+    auto *CounterTy = Type::getInt8Ty(Ctx);
+    auto *CounterArrTy = ArrayType::get(CounterTy, NumCounters);
+    // TODO: `Constant::getAllOnesValue()` does not yet accept an array type.
+    std::vector<Constant *> InitialValues(NumCounters,
+                                          Constant::getAllOnesValue(CounterTy));
+    GV = new GlobalVariable(*M, CounterArrTy, false, Linkage,
+                            ConstantArray::get(CounterArrTy, InitialValues),
+                            Name);
+    GV->setAlignment(Align(1));
+  } else {
+    auto *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
+    GV = new GlobalVariable(*M, CounterTy, false, Linkage,
+                            Constant::getNullValue(CounterTy), Name);
+    GV->setAlignment(Align(8));
+  }
+  return GV;
+}
+
+GlobalVariable *
 InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) {
   GlobalVariable *NamePtr = Inc->getName();
   auto &PD = ProfileDataMap[NamePtr];
@@ -914,16 +954,11 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) {
 
   uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
   LLVMContext &Ctx = M->getContext();
-  ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
 
-  // Create the counters variable.
-  auto *CounterPtr =
-      new GlobalVariable(*M, CounterTy, false, Linkage,
-                         Constant::getNullValue(CounterTy), CntsVarName);
+  auto *CounterPtr = createRegionCounters(Inc, CntsVarName, Linkage);
   CounterPtr->setVisibility(Visibility);
   CounterPtr->setSection(
       getInstrProfSectionName(IPSK_cnts, TT.getObjectFormat()));
-  CounterPtr->setAlignment(Align(8));
   MaybeSetComdat(CounterPtr);
   CounterPtr->setLinkage(Linkage);
   PD.RegionCounters = CounterPtr;
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 8fedefccf0e1..5e078f2c4212 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -26,6 +26,7 @@
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index cfe993dedbc2..c51acdf52f14 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -182,6 +182,7 @@
 #include "llvm/IR/ValueMap.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/Alignment.h"
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
@@ -1718,11 +1719,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
             // Figure out maximal valid memcpy alignment.
             const Align ArgAlign = DL.getValueOrABITypeAlignment(
                 MaybeAlign(FArg.getParamAlignment()), FArg.getParamByValType());
-            Value *CpShadowPtr =
+            Value *CpShadowPtr, *CpOriginPtr;
+            std::tie(CpShadowPtr, CpOriginPtr) =
                 getShadowOriginPtr(V, EntryIRB, EntryIRB.getInt8Ty(), ArgAlign,
-                                   /*isStore*/ true)
-                    .first;
-            // TODO(glider): need to copy origins.
+                                   /*isStore*/ true);
             if (!PropagateShadow || Overflow) {
               // ParamTLS overflow.
               EntryIRB.CreateMemSet(
@@ -1735,6 +1735,19 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
                                                  CopyAlign, Size);
               LLVM_DEBUG(dbgs() << "  ByValCpy: " << *Cpy << "\n");
               (void)Cpy;
+
+              if (MS.TrackOrigins) {
+                Value *OriginPtr =
+                    getOriginPtrForArgument(&FArg, EntryIRB, ArgOffset);
+                // FIXME: OriginSize should be:
+                // alignTo(V % kMinOriginAlignment + Size, kMinOriginAlignment)
+                unsigned OriginSize = alignTo(Size, kMinOriginAlignment);
+                EntryIRB.CreateMemCpy(
+                    CpOriginPtr,
+                    /* by getShadowOriginPtr */ kMinOriginAlignment, OriginPtr,
+                    /* by origin_tls[ArgOffset] */ kMinOriginAlignment,
+                    OriginSize);
+              }
             }
           }
 
@@ -3701,7 +3714,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
         insertShadowCheck(A, &CB);
         Size = DL.getTypeAllocSize(A->getType());
       } else {
-        bool ArgIsInitialized = false;
         Value *Store = nullptr;
         // Compute the Shadow for arg even if it is ByVal, because
         // in that case getShadow() will copy the actual arg shadow to
@@ -3722,10 +3734,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
           MaybeAlign Alignment = llvm::None;
           if (ParamAlignment)
             Alignment = std::min(*ParamAlignment, kShadowTLSAlignment);
-          Value *AShadowPtr =
+          Value *AShadowPtr, *AOriginPtr;
+          std::tie(AShadowPtr, AOriginPtr) =
               getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), Alignment,
-                                 /*isStore*/ false)
-                  .first;
+                                 /*isStore*/ false);
           if (!PropagateShadow) {
             Store = IRB.CreateMemSet(ArgShadowBase,
                                      Constant::getNullValue(IRB.getInt8Ty()),
@@ -3733,6 +3745,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
           } else {
             Store = IRB.CreateMemCpy(ArgShadowBase, Alignment, AShadowPtr,
                                      Alignment, Size);
+            if (MS.TrackOrigins) {
+              Value *ArgOriginBase = getOriginPtrForArgument(A, IRB, ArgOffset);
+              // FIXME: OriginSize should be:
+              // alignTo(A % kMinOriginAlignment + Size, kMinOriginAlignment)
+              unsigned OriginSize = alignTo(Size, kMinOriginAlignment);
+              IRB.CreateMemCpy(
+                  ArgOriginBase,
+                  /* by origin_tls[ArgOffset] */ kMinOriginAlignment,
+                  AOriginPtr,
+                  /* by getShadowOriginPtr */ kMinOriginAlignment, OriginSize);
+            }
           }
         } else {
           // Any other parameters mean we need bit-grained tracking of uninit
@@ -3743,12 +3766,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
           Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
                                          kShadowTLSAlignment);
           Constant *Cst = dyn_cast<Constant>(ArgShadow);
-          if (Cst && Cst->isNullValue())
-            ArgIsInitialized = true;
+          if (MS.TrackOrigins && !(Cst && Cst->isNullValue())) {
+            IRB.CreateStore(getOrigin(A),
+                            getOriginPtrForArgument(A, IRB, ArgOffset));
+          }
         }
-        if (MS.TrackOrigins && !ArgIsInitialized)
-          IRB.CreateStore(getOrigin(A),
-                          getOriginPtrForArgument(A, IRB, ArgOffset));
         (void)Store;
         assert(Store != nullptr);
         LLVM_DEBUG(dbgs() << "  Param:" << *Store << "\n");
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index c46415e5b1f4..0902a94452e3 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -255,6 +255,11 @@ static cl::opt<bool> PGOInstrumentEntry(
     "pgo-instrument-entry", cl::init(false), cl::Hidden,
     cl::desc("Force to instrument function entry basicblock."));
 
+static cl::opt<bool> PGOFunctionEntryCoverage(
+    "pgo-function-entry-coverage", cl::init(false), cl::Hidden, cl::ZeroOrMore,
+    cl::desc(
+        "Use this option to enable function entry coverage instrumentation."));
+
 static cl::opt<bool>
     PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
                      cl::desc("Fix function entry count in profile use."));
@@ -337,6 +342,33 @@ static const char *ValueProfKindDescr[] = {
 #include "llvm/ProfileData/InstrProfData.inc"
 };
 
+// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
+// aware this is an ir_level profile so it can set the version flag.
+static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) {
+  const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
+  Type *IntTy64 = Type::getInt64Ty(M.getContext());
+  uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
+  if (IsCS)
+    ProfileVersion |= VARIANT_MASK_CSIR_PROF;
+  if (PGOInstrumentEntry)
+    ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
+  if (DebugInfoCorrelate)
+    ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
+  if (PGOFunctionEntryCoverage)
+    ProfileVersion |=
+        VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY;
+  auto IRLevelVersionVariable = new GlobalVariable(
+      M, IntTy64, true, GlobalValue::WeakAnyLinkage,
+      Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
+  IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility);
+  Triple TT(M.getTargetTriple());
+  if (TT.supportsCOMDAT()) {
+    IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
+    IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
+  }
+  return IRLevelVersionVariable;
+}
+
 namespace {
 
 /// The select instruction visitor plays three roles specified
@@ -469,9 +501,7 @@ private:
     createProfileFileNameVar(M, InstrProfileOutput);
     // The variable in a comdat may be discarded by LTO. Ensure the
     // declaration will be retained.
-    appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true,
-                                                        PGOInstrumentEntry,
-                                                        DebugInfoCorrelate));
+    appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true));
     return false;
   }
   std::string InstrProfileOutput;
@@ -914,22 +944,39 @@ static void instrumentOneFunc(
 
   FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(
       F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry);
+
+  Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
+  auto Name = ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy);
+  auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()),
+                                  FuncInfo.FunctionHash);
+  if (PGOFunctionEntryCoverage) {
+    assert(!IsCS &&
+           "entry coverge does not support context-sensitive instrumentation");
+    auto &EntryBB = F.getEntryBlock();
+    IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
+    // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
+    //                      i32 <index>)
+    Builder.CreateCall(
+        Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover),
+        {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
+    return;
+  }
+
   std::vector<BasicBlock *> InstrumentBBs;
   FuncInfo.getInstrumentBBs(InstrumentBBs);
   unsigned NumCounters =
       InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
 
   uint32_t I = 0;
-  Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
   for (auto *InstrBB : InstrumentBBs) {
     IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
     assert(Builder.GetInsertPoint() != InstrBB->end() &&
            "Cannot get the Instrumentation point");
+    // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
+    //                          i32 <index>)
     Builder.CreateCall(
         Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment),
-        {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
-         Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters),
-         Builder.getInt32(I++)});
+        {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)});
   }
 
   // Now instrument select instructions:
@@ -1502,6 +1549,8 @@ void PGOUseFunc::annotateIrrLoopHeaderWeights() {
 }
 
 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
+  if (PGOFunctionEntryCoverage)
+    return;
   Module *M = F.getParent();
   IRBuilder<> Builder(&SI);
   Type *Int64Ty = Builder.getInt64Ty();
@@ -1622,8 +1671,7 @@ static bool InstrumentAllFunctions(
   // For the context-sensitve instrumentation, we should have a separated pass
   // (before LTO/ThinLTO linking) to create these variables.
   if (!IsCS)
-    createIRLevelProfileFlagVar(M, /*IsCS=*/false, PGOInstrumentEntry,
-                                DebugInfoCorrelate);
+    createIRLevelProfileFlagVar(M, /*IsCS=*/false);
   std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
   collectComdatMembers(M, ComdatMembers);
 
@@ -1645,9 +1693,7 @@ PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) {
   createProfileFileNameVar(M, CSInstrName);
   // The variable in a comdat may be discarded by LTO. Ensure the declaration
   // will be retained.
-  appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true,
-                                                      PGOInstrumentEntry,
-                                                      DebugInfoCorrelate));
+  appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true));
   return PreservedAnalyses::all();
 }
 
@@ -1844,6 +1890,18 @@ static bool annotateAllFunctions(
         ProfileFileName.data(), "Not an IR level instrumentation profile"));
     return false;
   }
+  if (PGOReader->hasSingleByteCoverage()) {
+    Ctx.diagnose(DiagnosticInfoPGOProfile(
+        ProfileFileName.data(),
+        "Cannot use coverage profiles for optimization"));
+    return false;
+  }
+  if (PGOReader->functionEntryOnly()) {
+    Ctx.diagnose(DiagnosticInfoPGOProfile(
+        ProfileFileName.data(),
+        "Function entry profiles are not yet supported for optimization"));
+    return false;
+  }
 
   // Add the profile summary (read from the header of the indexed summary) here
   // so that we can use it below when reading counters (which checks if the
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
index 1ca6ddabac5b..126845bb3308 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
@@ -123,20 +123,9 @@ BundledRetainClaimRVs::~BundledRetainClaimRVs() {
       // can't be tail calls.
       if (auto *CI = dyn_cast<CallInst>(CB))
         CI->setTailCallKind(CallInst::TCK_NoTail);
-
-      if (UseMarker) {
-        // Remove the retainRV/claimRV function operand from the operand bundle
-        // to reflect the fact that the backend is responsible for emitting only
-        // the marker instruction, but not the retainRV/claimRV call.
-        OperandBundleDef OB("clang.arc.attachedcall", None);
-        auto *NewCB = CallBase::Create(CB, OB, CB);
-        CB->replaceAllUsesWith(NewCB);
-        CB->eraseFromParent();
-      }
     }
 
-    if (!ContractPass || !UseMarker)
-      EraseInstruction(P.first);
+    EraseInstruction(P.first);
   }
 
   RVCalls.clear();
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARC.h b/llvm/lib/Transforms/ObjCARC/ObjCARC.h
index 2b47bec7ffe8..62f88a8cc02b 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARC.h
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARC.h
@@ -105,8 +105,7 @@ CallInst *createCallInstWithColors(
 
 class BundledRetainClaimRVs {
 public:
-  BundledRetainClaimRVs(bool ContractPass, bool UseMarker)
-      : ContractPass(ContractPass), UseMarker(UseMarker) {}
+  BundledRetainClaimRVs(bool ContractPass) : ContractPass(ContractPass) {}
   ~BundledRetainClaimRVs();
 
   /// Insert a retainRV/claimRV call to the normal destination blocks of invokes
@@ -156,9 +155,6 @@ private:
   DenseMap<CallInst *, CallBase *> RVCalls;
 
   bool ContractPass;
-
-  /// Indicates whether the target uses a special inline-asm marker.
-  bool UseMarker;
 };
 
 } // end namespace objcarc
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index 9e2832827686..2985ae004d3c 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -434,23 +434,20 @@ bool ObjCARCContract::tryToPeepholeInstruction(
     LLVM_FALLTHROUGH;
   case ARCInstKind::RetainRV:
   case ARCInstKind::UnsafeClaimRV: {
-    bool IsInstContainedInBundle = BundledInsts->contains(Inst);
-
-    // Return now if the target doesn't need a special inline-asm marker. Return
-    // true if this is a bundled retainRV/claimRV call, which is going to be
-    // erased at the end of this pass, to avoid undoing objc-arc-expand and
+    // Return true if this is a bundled retainRV/claimRV call, which is always
+    // redundant with the attachedcall in the bundle, and is going to be erased
+    // at the end of this pass.  This avoids undoing objc-arc-expand and
     // replacing uses of the retainRV/claimRV call's argument with its result.
-    if (!RVInstMarker)
-      return IsInstContainedInBundle;
-
-    // The target needs a special inline-asm marker.
+    if (BundledInsts->contains(Inst))
+      return true;
 
-    // We don't have to emit the marker if this is a bundled call since the
-    // backend is responsible for emitting it. Return false to undo
-    // objc-arc-expand.
-    if (IsInstContainedInBundle)
+    // If this isn't a bundled call, and the target doesn't need a special
+    // inline-asm marker, we're done: return now, and undo objc-arc-expand.
+    if (!RVInstMarker)
       return false;
 
+    // The target needs a special inline-asm marker.  Insert it.
+
     BasicBlock::iterator BBI = Inst->getIterator();
     BasicBlock *InstParent = Inst->getParent();
 
@@ -548,7 +545,7 @@ bool ObjCARCContract::run(Function &F, AAResults *A, DominatorTree *D) {
   AA = A;
   DT = D;
   PA.setAA(A);
-  BundledRetainClaimRVs BRV(true, RVInstMarker);
+  BundledRetainClaimRVs BRV(/*ContractPass=*/true);
   BundledInsts = &BRV;
 
   std::pair<bool, bool> R = BundledInsts->insertAfterInvokes(F, DT);
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index b6dc97f1e43f..e1a000b31cf9 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -2459,7 +2459,7 @@ bool ObjCARCOpt::run(Function &F, AAResults &AA) {
     return false;
 
   Changed = CFGChanged = false;
-  BundledRetainClaimRVs BRV(false, objcarc::getRVInstMarker(*F.getParent()));
+  BundledRetainClaimRVs BRV(/*ContractPass=*/false);
   BundledInsts = &BRV;
 
   LLVM_DEBUG(dbgs() << "<<< ObjCARCOpt: Visiting Function: " << F.getName()
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index dda1a2f08076..143a78f604fc 100644
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -357,7 +357,7 @@ typedef DenseMap<BasicBlock *, CloneList> DuplicateBlockMap;
 
 // This map keeps track of all the new definitions for an instruction. This
 // information is needed when restoring SSA form after cloning blocks.
-typedef DenseMap<Instruction *, std::vector<Instruction *>> DefMap;
+typedef MapVector<Instruction *, std::vector<Instruction *>> DefMap;
 
 inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) {
   OS << "< ";
@@ -1126,6 +1126,9 @@ private:
   /// Add new value mappings to the DefMap to keep track of all new definitions
   /// for a particular instruction. These will be used while updating SSA form.
   void updateDefMap(DefMap &NewDefs, ValueToValueMapTy &VMap) {
+    SmallVector<std::pair<Instruction *, Instruction *>> NewDefsVector;
+    NewDefsVector.reserve(VMap.size());
+
     for (auto Entry : VMap) {
       Instruction *Inst =
           dyn_cast<Instruction>(const_cast<Value *>(Entry.first));
@@ -1138,11 +1141,18 @@ private:
       if (!Cloned)
         continue;
 
-      if (NewDefs.find(Inst) == NewDefs.end())
-        NewDefs[Inst] = {Cloned};
-      else
-        NewDefs[Inst].push_back(Cloned);
+      NewDefsVector.push_back({Inst, Cloned});
     }
+
+    // Sort the defs to get deterministic insertion order into NewDefs.
+    sort(NewDefsVector, [](const auto &LHS, const auto &RHS) {
+      if (LHS.first == RHS.first)
+        return LHS.second->comesBefore(RHS.second);
+      return LHS.first->comesBefore(RHS.first);
+    });
+
+    for (const auto &KV : NewDefsVector)
+      NewDefs[KV.first].push_back(KV.second);
   }
 
   /// Update the last branch of a particular cloned path to point to the correct
diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index ca19913e37ee..bf4d275e04ba 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -192,6 +192,7 @@ struct FusionCandidate {
         GuardBranch(L->getLoopGuardBranch()), PP(PP), AbleToPeel(canPeel(L)),
         Peeled(false), DT(DT), PDT(PDT), ORE(ORE) {
 
+    assert(DT && "Expected non-null DT!");
     // Walk over all blocks in the loop and check for conditions that may
     // prevent fusion. For each block, walk over all instructions and collect
     // the memory reads and writes If any instructions that prevent fusion are
@@ -767,7 +768,7 @@ private:
     LLVM_DEBUG(dbgs() << "Attempting to peel first " << PeelCount
                       << " iterations of the first loop. \n");
 
-    FC0.Peeled = peelLoop(FC0.L, PeelCount, &LI, &SE, &DT, &AC, true);
+    FC0.Peeled = peelLoop(FC0.L, PeelCount, &LI, &SE, DT, &AC, true);
     if (FC0.Peeled) {
       LLVM_DEBUG(dbgs() << "Done Peeling\n");
 
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 35ba4e2b4032..318c4c06f0f7 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1172,8 +1172,15 @@ bool LoopIdiomRecognize::processLoopStridedStore(
 
   CallInst *NewCall;
   if (SplatValue) {
-    NewCall = Builder.CreateMemSet(BasePtr, SplatValue, NumBytes,
-                                   MaybeAlign(StoreAlignment));
+    AAMDNodes AATags = TheStore->getAAMetadata();
+    if (auto CI = dyn_cast<ConstantInt>(NumBytes))
+      AATags = AATags.extendTo(CI->getZExtValue());
+    else
+      AATags = AATags.extendTo(-1);
+
+    NewCall = Builder.CreateMemSet(
+        BasePtr, SplatValue, NumBytes, MaybeAlign(StoreAlignment),
+        /*isVolatile=*/false, AATags.TBAA, AATags.Scope, AATags.NoAlias);
   } else {
     // Everything is emitted in default address space
     Type *Int8PtrTy = DestInt8PtrTy;
@@ -1452,17 +1459,28 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
   Value *NumBytes =
       Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
 
+  AAMDNodes AATags = TheLoad->getAAMetadata();
+  AAMDNodes StoreAATags = TheStore->getAAMetadata();
+  AATags = AATags.merge(StoreAATags);
+  if (auto CI = dyn_cast<ConstantInt>(NumBytes))
+    AATags = AATags.extendTo(CI->getZExtValue());
+  else
+    AATags = AATags.extendTo(-1);
+
   CallInst *NewCall = nullptr;
   // Check whether to generate an unordered atomic memcpy:
   //  If the load or store are atomic, then they must necessarily be unordered
   //  by previous checks.
   if (!TheStore->isAtomic() && !TheLoad->isAtomic()) {
     if (UseMemMove)
-      NewCall = Builder.CreateMemMove(StoreBasePtr, StoreAlign, LoadBasePtr,
-                                      LoadAlign, NumBytes);
+      NewCall = Builder.CreateMemMove(
+          StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign, NumBytes,
+          /*isVolatile=*/false, AATags.TBAA, AATags.Scope, AATags.NoAlias);
     else
-      NewCall = Builder.CreateMemCpy(StoreBasePtr, StoreAlign, LoadBasePtr,
-                                     LoadAlign, NumBytes);
+      NewCall =
+          Builder.CreateMemCpy(StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign,
+                               NumBytes, /*isVolatile=*/false, AATags.TBAA,
+                               AATags.TBAAStruct, AATags.Scope, AATags.NoAlias);
   } else {
     // For now don't support unordered atomic memmove.
     if (UseMemMove)
@@ -1486,7 +1504,8 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
     // have an alignment but non-atomic loads/stores may not.
     NewCall = Builder.CreateElementUnorderedAtomicMemCpy(
         StoreBasePtr, StoreAlign.getValue(), LoadBasePtr, LoadAlign.getValue(),
-        NumBytes, StoreSize);
+        NumBytes, StoreSize, AATags.TBAA, AATags.TBAAStruct, AATags.Scope,
+        AATags.NoAlias);
   }
   NewCall->setDebugLoc(TheStore->getDebugLoc());
 
diff --git a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index 728d63fe2847..d3fcba10c275 100644
--- a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -468,7 +468,7 @@ private:
       LI.removeBlock(BB);
     }
 
-    DetatchDeadBlocks(DeadLoopBlocks, &DTUpdates, /*KeepOneInputPHIs*/true);
+    detachDeadBlocks(DeadLoopBlocks, &DTUpdates, /*KeepOneInputPHIs*/true);
     DTU.applyUpdates(DTUpdates);
     DTUpdates.clear();
     for (auto *BB : DeadLoopBlocks)
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 022d9c7abc8c..9beb2281cf0f 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -1281,7 +1281,7 @@ static LoopUnrollResult tryToUnrollLoop(
              << " iterations";
     });
 
-    if (peelLoop(L, PP.PeelCount, LI, &SE, &DT, &AC, PreserveLCSSA)) {
+    if (peelLoop(L, PP.PeelCount, LI, &SE, DT, &AC, PreserveLCSSA)) {
       simplifyLoopAfterUnroll(L, true, LI, &SE, &DT, &AC, &TTI);
       // If the loop was peeled, we already "used up" the profile information
       // we had, so we don't want to unroll or peel again.
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 8f1d0181ee5b..296becb31e8f 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -1339,16 +1339,21 @@ public:
 
     // Copy load operand to new alloca.
     Builder.SetInsertPoint(Copy, Copy->begin());
-    AllocaInst *NewLd =
-        Builder.CreateAlloca(Load->getType(), Load->getPointerAddressSpace());
-    Builder.CreateMemCpy(NewLd, NewLd->getAlign(),
-                         Load->getPointerOperand(), Load->getAlign(),
-                         LoadLoc.Size.getValue());
+    auto *VT = cast<FixedVectorType>(Load->getType());
+    // Use an array type for the alloca, to avoid potentially huge alignment
+    // requirements for large vector types.
+    auto *ArrayTy = ArrayType::get(VT->getElementType(), VT->getNumElements());
+    AllocaInst *Alloca =
+        Builder.CreateAlloca(ArrayTy, Load->getPointerAddressSpace());
+    Value *BC = Builder.CreateBitCast(Alloca, VT->getPointerTo());
+
+    Builder.CreateMemCpy(BC, Alloca->getAlign(), Load->getPointerOperand(),
+                         Load->getAlign(), LoadLoc.Size.getValue());
     Builder.SetInsertPoint(Fusion, Fusion->begin());
     PHINode *PHI = Builder.CreatePHI(Load->getPointerOperandType(), 3);
     PHI->addIncoming(Load->getPointerOperand(), Check0);
     PHI->addIncoming(Load->getPointerOperand(), Check1);
-    PHI->addIncoming(NewLd, Copy);
+    PHI->addIncoming(BC, Copy);
 
     // Adjust DT.
     DTUpdates.push_back({DT->Insert, Check0, Check1});
diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
index 2476e6c408b1..f35c9212a6f9 100644
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -77,6 +77,7 @@
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/MemorySSA.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constant.h"
@@ -1736,18 +1737,18 @@ NewGVN::performSymbolicPHIEvaluation(ArrayRef<ValPair> PHIOps,
   if (Filtered.empty()) {
     // If it has undef or poison at this point, it means there are no-non-undef
     // arguments, and thus, the value of the phi node must be undef.
-    if (HasPoison && !HasUndef) {
-      LLVM_DEBUG(
-          dbgs() << "PHI Node " << *I
-                 << " has no non-poison arguments, valuing it as poison\n");
-      return createConstantExpression(PoisonValue::get(I->getType()));
-    }
     if (HasUndef) {
       LLVM_DEBUG(
           dbgs() << "PHI Node " << *I
                  << " has no non-undef arguments, valuing it as undef\n");
       return createConstantExpression(UndefValue::get(I->getType()));
     }
+    if (HasPoison) {
+      LLVM_DEBUG(
+          dbgs() << "PHI Node " << *I
+                 << " has no non-poison arguments, valuing it as poison\n");
+      return createConstantExpression(PoisonValue::get(I->getType()));
+    }
 
     LLVM_DEBUG(dbgs() << "No arguments of PHI node " << *I << " are live\n");
     deleteExpression(E);
@@ -1757,6 +1758,11 @@ NewGVN::performSymbolicPHIEvaluation(ArrayRef<ValPair> PHIOps,
   ++Filtered.begin();
   // Can't use std::equal here, sadly, because filter.begin moves.
   if (llvm::all_of(Filtered, [&](Value *Arg) { return Arg == AllSameValue; })) {
+    // Can't fold phi(undef, X) -> X unless X can't be poison (thus X is undef
+    // in the worst case).
+    if (HasUndef && !isGuaranteedNotToBePoison(AllSameValue, AC, nullptr, DT))
+      return E;
+
     // In LLVM's non-standard representation of phi nodes, it's possible to have
     // phi nodes with cycles (IE dependent on other phis that are .... dependent
     // on the original phi node), especially in weird CFG's where some arguments
@@ -1764,8 +1770,8 @@ NewGVN::performSymbolicPHIEvaluation(ArrayRef<ValPair> PHIOps,
     // infinite loops during evaluation. We work around this by not trying to
     // really evaluate them independently, but instead using a variable
     // expression to say if one is equivalent to the other.
-    // We also special case undef, so that if we have an undef, we can't use the
-    // common value unless it dominates the phi block.
+    // We also special case undef/poison, so that if we have an undef, we can't
+    // use the common value unless it dominates the phi block.
     if (HasPoison || HasUndef) {
       // If we have undef and at least one other value, this is really a
       // multivalued phi, and we need to know if it's cycle free in order to
@@ -2853,14 +2859,14 @@ NewGVN::makePossiblePHIOfOps(Instruction *I,
 }
 
 // The algorithm initially places the values of the routine in the TOP
-// congruence class. The leader of TOP is the undetermined value `undef`.
+// congruence class. The leader of TOP is the undetermined value `poison`.
 // When the algorithm has finished, values still in TOP are unreachable.
 void NewGVN::initializeCongruenceClasses(Function &F) {
   NextCongruenceNum = 0;
 
   // Note that even though we use the live on entry def as a representative
   // MemoryAccess, it is *not* the same as the actual live on entry def. We
-  // have no real equivalemnt to undef for MemoryAccesses, and so we really
+  // have no real equivalent to poison for MemoryAccesses, and so we really
   // should be checking whether the MemoryAccess is top if we want to know if it
   // is equivalent to everything.  Otherwise, what this really signifies is that
   // the access "it reaches all the way back to the beginning of the function"
@@ -3031,7 +3037,7 @@ void NewGVN::valueNumberMemoryPhi(MemoryPhi *MP) {
            !isMemoryAccessTOP(cast<MemoryAccess>(U)) &&
            ReachableEdges.count({MP->getIncomingBlock(U), PHIBlock});
   });
-  // If all that is left is nothing, our memoryphi is undef. We keep it as
+  // If all that is left is nothing, our memoryphi is poison. We keep it as
   // InitialClass.  Note: The only case this should happen is if we have at
   // least one self-argument.
   if (Filtered.begin() == Filtered.end()) {
diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 3da367341d2a..b795ad3899bc 100644
--- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -258,6 +258,7 @@ struct GCPtrLivenessData {
 // base relation will remain.  Internally, we add a mixture of the two
 // types, then update all the second type to the first type
 using DefiningValueMapTy = MapVector<Value *, Value *>;
+using PointerToBaseTy = MapVector<Value *, Value *>;
 using StatepointLiveSetTy = SetVector<Value *>;
 using RematerializedValueMapTy =
     MapVector<AssertingVH<Instruction>, AssertingVH<Value>>;
@@ -266,9 +267,6 @@ struct PartiallyConstructedSafepointRecord {
   /// The set of values known to be live across this safepoint
   StatepointLiveSetTy LiveSet;
 
-  /// Mapping from live pointers to a base-defining-value
-  MapVector<Value *, Value *> PointerToBase;
-
   /// The *new* gc.statepoint instruction itself.  This produces the token
   /// that normal path gc.relocates and the gc.result are tied to.
   GCStatepointInst *StatepointToken;
@@ -1255,10 +1253,9 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
 // post condition: PointerToBase contains one (derived, base) pair for every
 // pointer in live.  Note that derived can be equal to base if the original
 // pointer was a base pointer.
-static void
-findBasePointers(const StatepointLiveSetTy &live,
-                 MapVector<Value *, Value *> &PointerToBase,
-                 DominatorTree *DT, DefiningValueMapTy &DVCache) {
+static void findBasePointers(const StatepointLiveSetTy &live,
+                             PointerToBaseTy &PointerToBase, DominatorTree *DT,
+                             DefiningValueMapTy &DVCache) {
   for (Value *ptr : live) {
     Value *base = findBasePointer(ptr, DVCache);
     assert(base && "failed to find base pointer");
@@ -1274,8 +1271,8 @@ findBasePointers(const StatepointLiveSetTy &live,
 /// parse point.
 static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
                              CallBase *Call,
-                             PartiallyConstructedSafepointRecord &result) {
-  MapVector<Value *, Value *> PointerToBase;
+                             PartiallyConstructedSafepointRecord &result,
+                             PointerToBaseTy &PointerToBase) {
   StatepointLiveSetTy PotentiallyDerivedPointers = result.LiveSet;
   // We assume that all pointers passed to deopt are base pointers; as an
   // optimization, we can use this to avoid seperately materializing the base
@@ -1290,37 +1287,27 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
       PointerToBase[V] = V;
     }
   findBasePointers(PotentiallyDerivedPointers, PointerToBase, &DT, DVCache);
-
-  if (PrintBasePointers) {
-    errs() << "Base Pairs (w/o Relocation):\n";
-    for (auto &Pair : PointerToBase) {
-      errs() << " derived ";
-      Pair.first->printAsOperand(errs(), false);
-      errs() << " base ";
-      Pair.second->printAsOperand(errs(), false);
-      errs() << "\n";;
-    }
-  }
-
-  result.PointerToBase = PointerToBase;
 }
 
 /// Given an updated version of the dataflow liveness results, update the
 /// liveset and base pointer maps for the call site CS.
 static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
                                   CallBase *Call,
-                                  PartiallyConstructedSafepointRecord &result);
+                                  PartiallyConstructedSafepointRecord &result,
+                                  PointerToBaseTy &PointerToBase);
 
 static void recomputeLiveInValues(
     Function &F, DominatorTree &DT, ArrayRef<CallBase *> toUpdate,
-    MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
+    MutableArrayRef<struct PartiallyConstructedSafepointRecord> records,
+    PointerToBaseTy &PointerToBase) {
   // TODO-PERF: reuse the original liveness, then simply run the dataflow
   // again.  The old values are still live and will help it stabilize quickly.
   GCPtrLivenessData RevisedLivenessData;
   computeLiveInValues(DT, F, RevisedLivenessData);
   for (size_t i = 0; i < records.size(); i++) {
     struct PartiallyConstructedSafepointRecord &info = records[i];
-    recomputeLiveInValues(RevisedLivenessData, toUpdate[i], info);
+    recomputeLiveInValues(RevisedLivenessData, toUpdate[i], info,
+                          PointerToBase);
   }
 }
 
@@ -1537,7 +1524,8 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
                            const SmallVectorImpl<Value *> &BasePtrs,
                            const SmallVectorImpl<Value *> &LiveVariables,
                            PartiallyConstructedSafepointRecord &Result,
-                           std::vector<DeferredReplacement> &Replacements) {
+                           std::vector<DeferredReplacement> &Replacements,
+                           const PointerToBaseTy &PointerToBase) {
   assert(BasePtrs.size() == LiveVariables.size());
 
   // Then go ahead and use the builder do actually do the inserts.  We insert
@@ -1626,10 +1614,10 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
       auto &Context = Call->getContext();
       auto &DL = Call->getModule()->getDataLayout();
       auto GetBaseAndOffset = [&](Value *Derived) {
-        assert(Result.PointerToBase.count(Derived));
+        assert(PointerToBase.count(Derived));
         unsigned AddressSpace = Derived->getType()->getPointerAddressSpace();
         unsigned IntPtrSize = DL.getPointerSizeInBits(AddressSpace);
-        Value *Base = Result.PointerToBase.find(Derived)->second;
+        Value *Base = PointerToBase.find(Derived)->second;
         Value *Base_int = Builder.CreatePtrToInt(
             Base, Type::getIntNTy(Context, IntPtrSize));
         Value *Derived_int = Builder.CreatePtrToInt(
@@ -1819,9 +1807,9 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
 static void
 makeStatepointExplicit(DominatorTree &DT, CallBase *Call,
                        PartiallyConstructedSafepointRecord &Result,
-                       std::vector<DeferredReplacement> &Replacements) {
+                       std::vector<DeferredReplacement> &Replacements,
+                       const PointerToBaseTy &PointerToBase) {
   const auto &LiveSet = Result.LiveSet;
-  const auto &PointerToBase = Result.PointerToBase;
 
   // Convert to vector for efficient cross referencing.
   SmallVector<Value *, 64> BaseVec, LiveVec;
@@ -1836,7 +1824,8 @@ makeStatepointExplicit(DominatorTree &DT, CallBase *Call,
   assert(LiveVec.size() == BaseVec.size());
 
   // Do the actual rewriting and delete the old statepoint
-  makeStatepointExplicitImpl(Call, BaseVec, LiveVec, Result, Replacements);
+  makeStatepointExplicitImpl(Call, BaseVec, LiveVec, Result, Replacements,
+                             PointerToBase);
 }
 
 // Helper function for the relocationViaAlloca.
@@ -2238,6 +2227,7 @@ static bool AreEquivalentPhiNodes(PHINode &OrigRootPhi, PHINode &AlternateRootPh
 // relocated values we don't do any user adjustments here.
 static void rematerializeLiveValues(CallBase *Call,
                                     PartiallyConstructedSafepointRecord &Info,
+                                    PointerToBaseTy &PointerToBase,
                                     TargetTransformInfo &TTI) {
   const unsigned int ChainLengthThreshold = 10;
 
@@ -2248,7 +2238,7 @@ static void rematerializeLiveValues(CallBase *Call,
   for (Value *LiveValue: Info.LiveSet) {
     // For each live pointer find its defining chain
     SmallVector<Instruction *, 3> ChainToBase;
-    assert(Info.PointerToBase.count(LiveValue));
+    assert(PointerToBase.count(LiveValue));
     Value *RootOfChain =
       findRematerializableChainToBasePointer(ChainToBase,
                                              LiveValue);
@@ -2260,9 +2250,9 @@ static void rematerializeLiveValues(CallBase *Call,
 
     // Handle the scenario where the RootOfChain is not equal to the
     // Base Value, but they are essentially the same phi values.
-    if (RootOfChain != Info.PointerToBase[LiveValue]) {
+    if (RootOfChain != PointerToBase[LiveValue]) {
       PHINode *OrigRootPhi = dyn_cast<PHINode>(RootOfChain);
-      PHINode *AlternateRootPhi = dyn_cast<PHINode>(Info.PointerToBase[LiveValue]);
+      PHINode *AlternateRootPhi = dyn_cast<PHINode>(PointerToBase[LiveValue]);
       if (!OrigRootPhi || !AlternateRootPhi)
         continue;
       // PHI nodes that have the same incoming values, and belonging to the same
@@ -2362,7 +2352,7 @@ static void rematerializeLiveValues(CallBase *Call,
       Instruction *InsertBefore = Call->getNextNode();
       assert(InsertBefore);
       Instruction *RematerializedValue = rematerializeChain(
-          InsertBefore, RootOfChain, Info.PointerToBase[LiveValue]);
+          InsertBefore, RootOfChain, PointerToBase[LiveValue]);
       Info.RematerializedValues[RematerializedValue] = LiveValue;
     } else {
       auto *Invoke = cast<InvokeInst>(Call);
@@ -2373,9 +2363,9 @@ static void rematerializeLiveValues(CallBase *Call,
           &*Invoke->getUnwindDest()->getFirstInsertionPt();
 
       Instruction *NormalRematerializedValue = rematerializeChain(
-          NormalInsertBefore, RootOfChain, Info.PointerToBase[LiveValue]);
+          NormalInsertBefore, RootOfChain, PointerToBase[LiveValue]);
       Instruction *UnwindRematerializedValue = rematerializeChain(
-          UnwindInsertBefore, RootOfChain, Info.PointerToBase[LiveValue]);
+          UnwindInsertBefore, RootOfChain, PointerToBase[LiveValue]);
 
       Info.RematerializedValues[NormalRematerializedValue] = LiveValue;
       Info.RematerializedValues[UnwindRematerializedValue] = LiveValue;
@@ -2491,10 +2481,24 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
   // site.
   findLiveReferences(F, DT, ToUpdate, Records);
 
+  /// Global mapping from live pointers to a base-defining-value.
+  PointerToBaseTy PointerToBase;
+
   // B) Find the base pointers for each live pointer
   for (size_t i = 0; i < Records.size(); i++) {
     PartiallyConstructedSafepointRecord &info = Records[i];
-    findBasePointers(DT, DVCache, ToUpdate[i], info);
+    findBasePointers(DT, DVCache, ToUpdate[i], info, PointerToBase);
+  }
+  if (PrintBasePointers) {
+    errs() << "Base Pairs (w/o Relocation):\n";
+    for (auto &Pair : PointerToBase) {
+      errs() << " derived ";
+      Pair.first->printAsOperand(errs(), false);
+      errs() << " base ";
+      Pair.second->printAsOperand(errs(), false);
+      errs() << "\n";
+      ;
+    }
   }
 
   // The base phi insertion logic (for any safepoint) may have inserted new
@@ -2515,8 +2519,10 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
     PartiallyConstructedSafepointRecord &Info = Records[i];
 
     SmallVector<Value *, 128> Bases;
-    for (auto Pair : Info.PointerToBase)
-      Bases.push_back(Pair.second);
+    for (auto *Derived : Info.LiveSet) {
+      assert(PointerToBase.count(Derived) && "Missed base for derived pointer");
+      Bases.push_back(PointerToBase[Derived]);
+    }
 
     insertUseHolderAfter(ToUpdate[i], Bases, Holders);
   }
@@ -2524,18 +2530,16 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
   // By selecting base pointers, we've effectively inserted new uses. Thus, we
   // need to rerun liveness.  We may *also* have inserted new defs, but that's
   // not the key issue.
-  recomputeLiveInValues(F, DT, ToUpdate, Records);
+  recomputeLiveInValues(F, DT, ToUpdate, Records, PointerToBase);
 
   if (PrintBasePointers) {
-    for (auto &Info : Records) {
-      errs() << "Base Pairs: (w/Relocation)\n";
-      for (auto Pair : Info.PointerToBase) {
-        errs() << " derived ";
-        Pair.first->printAsOperand(errs(), false);
-        errs() << " base ";
-        Pair.second->printAsOperand(errs(), false);
-        errs() << "\n";
-      }
+    errs() << "Base Pairs: (w/Relocation)\n";
+    for (auto Pair : PointerToBase) {
+      errs() << " derived ";
+      Pair.first->printAsOperand(errs(), false);
+      errs() << " base ";
+      Pair.second->printAsOperand(errs(), false);
+      errs() << "\n";
     }
   }
 
@@ -2547,10 +2551,12 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
   // Note that the relocation placement code relies on this filtering for
   // correctness as it expects the base to be in the liveset, which isn't true
   // if the base is constant.
-  for (auto &Info : Records)
-    for (auto &BasePair : Info.PointerToBase)
-      if (isa<Constant>(BasePair.second))
-        Info.LiveSet.remove(BasePair.first);
+  for (auto &Info : Records) {
+    Info.LiveSet.remove_if([&](Value *LiveV) {
+      assert(PointerToBase.count(LiveV) && "Missed base for derived pointer");
+      return isa<Constant>(PointerToBase[LiveV]);
+    });
+  }
 
   for (CallInst *CI : Holders)
     CI->eraseFromParent();
@@ -2561,7 +2567,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
   // some values instead of relocating them. This is purely an optimization and
   // does not influence correctness.
   for (size_t i = 0; i < Records.size(); i++)
-    rematerializeLiveValues(ToUpdate[i], Records[i], TTI);
+    rematerializeLiveValues(ToUpdate[i], Records[i], PointerToBase, TTI);
 
   // We need this to safely RAUW and delete call or invoke return values that
   // may themselves be live over a statepoint.  For details, please see usage in
@@ -2575,7 +2581,8 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
   // previous statepoint can not be a live variable, thus we can and remove
   // the old statepoint calls as we go.)
   for (size_t i = 0; i < Records.size(); i++)
-    makeStatepointExplicit(DT, ToUpdate[i], Records[i], Replacements);
+    makeStatepointExplicit(DT, ToUpdate[i], Records[i], Replacements,
+                           PointerToBase);
 
   ToUpdate.clear(); // prevent accident use of invalid calls.
 
@@ -2594,8 +2601,8 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
     // these live sets, and migrate to using that data structure from this point
     // onward.
     Info.LiveSet.clear();
-    Info.PointerToBase.clear();
   }
+  PointerToBase.clear();
 
   // Do all the fixups of the original live variables to their relocated selves
   SmallVector<Value *, 128> Live;
@@ -3115,35 +3122,15 @@ static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
 
 static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
                                   CallBase *Call,
-                                  PartiallyConstructedSafepointRecord &Info) {
+                                  PartiallyConstructedSafepointRecord &Info,
+                                  PointerToBaseTy &PointerToBase) {
   StatepointLiveSetTy Updated;
   findLiveSetAtInst(Call, RevisedLivenessData, Updated);
 
   // We may have base pointers which are now live that weren't before.  We need
   // to update the PointerToBase structure to reflect this.
   for (auto V : Updated)
-    Info.PointerToBase.insert({V, V});
-
-#ifndef NDEBUG
-  for (auto V : Updated)
-    assert(Info.PointerToBase.count(V) &&
-           "Must be able to find base for live value!");
-#endif
-
-  // Remove any stale base mappings - this can happen since our liveness is
-  // more precise then the one inherent in the base pointer analysis.
-  DenseSet<Value *> ToErase;
-  for (auto KVPair : Info.PointerToBase)
-    if (!Updated.count(KVPair.first))
-      ToErase.insert(KVPair.first);
-
-  for (auto *V : ToErase)
-    Info.PointerToBase.erase(V);
-
-#ifndef NDEBUG
-  for (auto KVPair : Info.PointerToBase)
-    assert(Updated.count(KVPair.first) && "record for non-live value");
-#endif
+    PointerToBase.insert({ V, V });
 
   Info.LiveSet = Updated;
 }
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 35497ae5ed9a..8be8946702be 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -48,6 +48,7 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index ac580b4161f4..b3a445368537 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -276,6 +276,8 @@ class StructurizeCFG {
 
   void insertConditions(bool Loops);
 
+  void simplifyConditions();
+
   void delPhiValues(BasicBlock *From, BasicBlock *To);
 
   void addPhiValues(BasicBlock *From, BasicBlock *To);
@@ -586,6 +588,28 @@ void StructurizeCFG::insertConditions(bool Loops) {
   }
 }
 
+/// Simplify any inverted conditions that were built by buildConditions.
+void StructurizeCFG::simplifyConditions() {
+  SmallVector<Instruction *> InstToErase;
+  for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) {
+    auto &Preds = I.second;
+    for (auto &J : Preds) {
+      auto &Cond = J.second;
+      Instruction *Inverted;
+      if (match(Cond, m_Not(m_OneUse(m_Instruction(Inverted)))) &&
+          !Cond->use_empty()) {
+        if (auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) {
+          InvertedCmp->setPredicate(InvertedCmp->getInversePredicate());
+          Cond->replaceAllUsesWith(InvertedCmp);
+          InstToErase.push_back(cast<Instruction>(Cond));
+        }
+      }
+    }
+  }
+  for (auto *I : InstToErase)
+    I->eraseFromParent();
+}
+
 /// Remove all PHI values coming from "From" into "To" and remember
 /// them in DeletedPhis
 void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
@@ -1065,6 +1089,7 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
   createFlow();
   insertConditions(false);
   insertConditions(true);
+  simplifyConditions();
   setPhiValues();
   simplifyAffectedPhis();
   rebuildSSA();
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index d6d6b1a7fa09..15c4a64eb794 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -59,7 +59,7 @@ static cl::opt<unsigned> MaxDeoptOrUnreachableSuccessorCheckDepth(
              "is followed by a block that either has a terminating "
              "deoptimizing call or is terminated with an unreachable"));
 
-void llvm::DetatchDeadBlocks(
+void llvm::detachDeadBlocks(
     ArrayRef<BasicBlock *> BBs,
     SmallVectorImpl<DominatorTree::UpdateType> *Updates,
     bool KeepOneInputPHIs) {
@@ -110,7 +110,7 @@ void llvm::DeleteDeadBlocks(ArrayRef <BasicBlock *> BBs, DomTreeUpdater *DTU,
 #endif
 
   SmallVector<DominatorTree::UpdateType, 4> Updates;
-  DetatchDeadBlocks(BBs, DTU ? &Updates : nullptr, KeepOneInputPHIs);
+  detachDeadBlocks(BBs, DTU ? &Updates : nullptr, KeepOneInputPHIs);
 
   if (DTU)
     DTU->applyUpdates(Updates);
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 048e691e33cf..86413df664a0 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -694,38 +694,39 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
       VMap[OrigV] = I;
   }
 
+  // Simplify conditional branches and switches with a constant operand. We try
+  // to prune these out when cloning, but if the simplification required
+  // looking through PHI nodes, those are only available after forming the full
+  // basic block. That may leave some here, and we still want to prune the dead
+  // code as early as possible.
+  Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator();
+  for (BasicBlock &BB : make_range(Begin, NewFunc->end()))
+    ConstantFoldTerminator(&BB);
+
+  // Some blocks may have become unreachable as a result. Find and delete them.
+  {
+    SmallPtrSet<BasicBlock *, 16> ReachableBlocks;
+    SmallVector<BasicBlock *, 16> Worklist;
+    Worklist.push_back(&*Begin);
+    while (!Worklist.empty()) {
+      BasicBlock *BB = Worklist.pop_back_val();
+      if (ReachableBlocks.insert(BB).second)
+        append_range(Worklist, successors(BB));
+    }
+
+    SmallVector<BasicBlock *, 16> UnreachableBlocks;
+    for (BasicBlock &BB : make_range(Begin, NewFunc->end()))
+      if (!ReachableBlocks.contains(&BB))
+        UnreachableBlocks.push_back(&BB);
+    DeleteDeadBlocks(UnreachableBlocks);
+  }
+
   // Now that the inlined function body has been fully constructed, go through
   // and zap unconditional fall-through branches. This happens all the time when
   // specializing code: code specialization turns conditional branches into
   // uncond branches, and this code folds them.
-  Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator();
   Function::iterator I = Begin;
   while (I != NewFunc->end()) {
-    // We need to simplify conditional branches and switches with a constant
-    // operand. We try to prune these out when cloning, but if the
-    // simplification required looking through PHI nodes, those are only
-    // available after forming the full basic block. That may leave some here,
-    // and we still want to prune the dead code as early as possible.
-    //
-    // Do the folding before we check if the block is dead since we want code
-    // like
-    //  bb:
-    //    br i1 undef, label %bb, label %bb
-    // to be simplified to
-    //  bb:
-    //    br label %bb
-    // before we call I->getSinglePredecessor().
-    ConstantFoldTerminator(&*I);
-
-    // Check if this block has become dead during inlining or other
-    // simplifications. Note that the first block will appear dead, as it has
-    // not yet been wired up properly.
-    if (I != Begin && (pred_empty(&*I) || I->getSinglePredecessor() == &*I)) {
-      BasicBlock *DeadBB = &*I++;
-      DeleteDeadBlock(DeadBB);
-      continue;
-    }
-
     BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
     if (!BI || BI->isConditional()) {
       ++I;
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 24cd5747c5a4..cec159f6a448 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -33,6 +33,7 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
@@ -857,8 +858,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
       (ParamTy.size() + AggParamTy.size()) ==
           (inputs.size() + outputs.size()) &&
       "Number of scalar and aggregate params does not match inputs, outputs");
-  assert(StructValues.empty() ||
-         AggregateArgs && "Expeced StructValues only with AggregateArgs set");
+  assert((StructValues.empty() || AggregateArgs) &&
+         "Expeced StructValues only with AggregateArgs set");
 
   // Concatenate scalar and aggregate params in ParamTy.
   size_t NumScalarParams = ParamTy.size();
diff --git a/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/llvm/lib/Transforms/Utils/GlobalStatus.cpp
index f8ec8c6ad426..c1c5f5cc879f 100644
--- a/llvm/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/llvm/lib/Transforms/Utils/GlobalStatus.cpp
@@ -65,15 +65,18 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
 
   for (const Use &U : V->uses()) {
     const User *UR = U.getUser();
-    if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(UR)) {
-      // If the result of the constantexpr isn't pointer type, then we won't
-      // know to expect it in various places.  Just reject early.
-      if (!isa<PointerType>(CE->getType()))
-        return true;
-
-      // FIXME: Do we need to add constexpr selects to VisitedUsers?
-      if (analyzeGlobalAux(CE, GS, VisitedUsers))
-        return true;
+    if (const Constant *C = dyn_cast<Constant>(UR)) {
+      const ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
+      if (CE && isa<PointerType>(CE->getType())) {
+        // Recursively analyze pointer-typed constant expressions.
+        // FIXME: Do we need to add constexpr selects to VisitedUsers?
+        if (analyzeGlobalAux(CE, GS, VisitedUsers))
+          return true;
+      } else {
+        // Ignore dead constant users.
+        if (!isSafeToDestroyConstant(C))
+          return true;
+      }
     } else if (const Instruction *I = dyn_cast<Instruction>(UR)) {
       if (!GS.HasMultipleAccessingFunctions) {
         const Function *F = I->getParent()->getParent();
@@ -169,10 +172,6 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
       } else {
         return true; // Any other non-load instruction might take address!
       }
-    } else if (const Constant *C = dyn_cast<Constant>(UR)) {
-      // We might have a dead and dangling constant hanging off of here.
-      if (!isSafeToDestroyConstant(C))
-        return true;
     } else {
       // Otherwise must be some other user.
       return true;
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index c9f872f5b7e1..923bcc781e47 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -39,6 +39,7 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -671,12 +672,9 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
   // edge from this block.
   SmallVector<Value *, 8> UnwindDestPHIValues;
   BasicBlock *InvokeBB = II->getParent();
-  for (Instruction &I : *UnwindDest) {
+  for (PHINode &PHI : UnwindDest->phis()) {
     // Save the value to use for this edge.
-    PHINode *PHI = dyn_cast<PHINode>(&I);
-    if (!PHI)
-      break;
-    UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB));
+    UnwindDestPHIValues.push_back(PHI.getIncomingValueForBlock(InvokeBB));
   }
 
   // Add incoming-PHI values to the unwind destination block for the given basic
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 9f33d2f82732..9a10535c9310 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -45,6 +45,7 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/DerivedTypes.h"
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index 92333408aaef..5b66da1e7082 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -737,7 +737,7 @@ TargetTransformInfo::PeelingPreferences llvm::gatherPeelingPreferences(
 /// for the bulk of dynamic execution, can be further simplified by scalar
 /// optimizations.
 bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
-                    ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
+                    ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC,
                     bool PreserveLCSSA) {
   assert(PeelCount > 0 && "Attempt to peel out zero iterations?");
   assert(canPeel(L) && "Attempt to peel a loop which is not peelable?");
@@ -756,23 +756,21 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
   // routes which can lead to the exit: we can reach it from the peeled
   // iterations too.
   DenseMap<BasicBlock *, BasicBlock *> NonLoopBlocksIDom;
-  if (DT) {
-    for (auto *BB : L->blocks()) {
-      auto *BBDomNode = DT->getNode(BB);
-      SmallVector<BasicBlock *, 16> ChildrenToUpdate;
-      for (auto *ChildDomNode : BBDomNode->children()) {
-        auto *ChildBB = ChildDomNode->getBlock();
-        if (!L->contains(ChildBB))
-          ChildrenToUpdate.push_back(ChildBB);
-      }
-      // The new idom of the block will be the nearest common dominator
-      // of all copies of the previous idom. This is equivalent to the
-      // nearest common dominator of the previous idom and the first latch,
-      // which dominates all copies of the previous idom.
-      BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, Latch);
-      for (auto *ChildBB : ChildrenToUpdate)
-        NonLoopBlocksIDom[ChildBB] = NewIDom;
+  for (auto *BB : L->blocks()) {
+    auto *BBDomNode = DT.getNode(BB);
+    SmallVector<BasicBlock *, 16> ChildrenToUpdate;
+    for (auto *ChildDomNode : BBDomNode->children()) {
+      auto *ChildBB = ChildDomNode->getBlock();
+      if (!L->contains(ChildBB))
+        ChildrenToUpdate.push_back(ChildBB);
     }
+    // The new idom of the block will be the nearest common dominator
+    // of all copies of the previous idom. This is equivalent to the
+    // nearest common dominator of the previous idom and the first latch,
+    // which dominates all copies of the previous idom.
+    BasicBlock *NewIDom = DT.findNearestCommonDominator(BB, Latch);
+    for (auto *ChildBB : ChildrenToUpdate)
+      NonLoopBlocksIDom[ChildBB] = NewIDom;
   }
 
   Function *F = Header->getParent();
@@ -822,11 +820,11 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
   //  If (cond) goto Header
   // Exit:
 
-  BasicBlock *InsertTop = SplitEdge(PreHeader, Header, DT, LI);
+  BasicBlock *InsertTop = SplitEdge(PreHeader, Header, &DT, LI);
   BasicBlock *InsertBot =
-      SplitBlock(InsertTop, InsertTop->getTerminator(), DT, LI);
+      SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI);
   BasicBlock *NewPreHeader =
-      SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
+      SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI);
 
   InsertTop->setName(Header->getName() + ".peel.begin");
   InsertBot->setName(Header->getName() + ".peel.next");
@@ -852,23 +850,21 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
     ValueToValueMapTy VMap;
 
     cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,
-                    LoopBlocks, VMap, LVMap, DT, LI,
+                    LoopBlocks, VMap, LVMap, &DT, LI,
                     LoopLocalNoAliasDeclScopes);
 
     // Remap to use values from the current iteration instead of the
     // previous one.
     remapInstructionsInBlocks(NewBlocks, VMap);
 
-    if (DT) {
-      // Update IDoms of the blocks reachable through exits.
-      if (Iter == 0)
-        for (auto BBIDom : NonLoopBlocksIDom)
-          DT->changeImmediateDominator(BBIDom.first,
-                                       cast<BasicBlock>(LVMap[BBIDom.second]));
+    // Update IDoms of the blocks reachable through exits.
+    if (Iter == 0)
+      for (auto BBIDom : NonLoopBlocksIDom)
+        DT.changeImmediateDominator(BBIDom.first,
+                                     cast<BasicBlock>(LVMap[BBIDom.second]));
 #ifdef EXPENSIVE_CHECKS
-      assert(DT->verify(DominatorTree::VerificationLevel::Fast));
+    assert(DT.verify(DominatorTree::VerificationLevel::Fast));
 #endif
-    }
 
     auto *LatchBRCopy = cast<BranchInst>(VMap[LatchBR]);
     updateBranchWeights(InsertBot, LatchBRCopy, ExitWeight, FallThroughWeight);
@@ -877,7 +873,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
     LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr);
 
     InsertTop = InsertBot;
-    InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
+    InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI);
     InsertBot->setName(Header->getName() + ".peel.next");
 
     F->getBasicBlockList().splice(InsertTop->getIterator(),
@@ -912,10 +908,10 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
   SE->forgetTopmostLoop(L);
 
   // Finally DomtTree must be correct.
-  assert(DT->verify(DominatorTree::VerificationLevel::Fast));
+  assert(DT.verify(DominatorTree::VerificationLevel::Fast));
 
   // FIXME: Incrementally update loop-simplify
-  simplifyLoop(L, DT, LI, SE, AC, nullptr, PreserveLCSSA);
+  simplifyLoop(L, &DT, LI, SE, AC, nullptr, PreserveLCSSA);
 
   NumPeeled++;
 
diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index 7c9ab7f6ca2c..d6a6be2762c7 100644
--- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -264,3 +264,16 @@ void VFABI::setVectorVariantNames(
   CI->addFnAttr(
       Attribute::get(M->getContext(), MappingsAttrName, Buffer.str()));
 }
+
+void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf,
+                               StringRef SectionName) {
+  // Embed the buffer into the module.
+  Constant *ModuleConstant = ConstantDataArray::get(
+      M.getContext(), makeArrayRef(Buf.getBufferStart(), Buf.getBufferSize()));
+  GlobalVariable *GV = new GlobalVariable(
+      M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage,
+      ModuleConstant, "llvm.embedded.object");
+  GV->setSection(SectionName);
+
+  appendToCompilerUsed(M, GV);
+}
diff --git a/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp b/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp
index 7083789267d9..deaee467531d 100644
--- a/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp
+++ b/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/IR/Module.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/MD5.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 
diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index b35ab57e0d87..01b433b4782a 100644
--- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -25,13 +25,13 @@
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/IteratedDominanceFrontier.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
@@ -45,6 +45,7 @@
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include <algorithm>
 #include <cassert>
diff --git a/llvm/lib/Transforms/Utils/Utils.cpp b/llvm/lib/Transforms/Utils/Utils.cpp
index 3ca36a1cad91..43eb5c87acee 100644
--- a/llvm/lib/Transforms/Utils/Utils.cpp
+++ b/llvm/lib/Transforms/Utils/Utils.cpp
@@ -16,6 +16,7 @@
 #include "llvm-c/Transforms/Utils.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
 #include "llvm/PassRegistry.h"
 
 using namespace llvm;
diff --git a/llvm/lib/Transforms/Utils/VNCoercion.cpp b/llvm/lib/Transforms/Utils/VNCoercion.cpp
index bbe6b3dc23b3..637181722f63 100644
--- a/llvm/lib/Transforms/Utils/VNCoercion.cpp
+++ b/llvm/lib/Transforms/Utils/VNCoercion.cpp
@@ -2,6 +2,7 @@
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/Debug.h"
 
 #define DEBUG_TYPE "vncoerce"
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index d11f4146b590..3290439ecd07 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -632,13 +632,6 @@ protected:
                                        Instruction *EntryVal, VPValue *Def,
                                        VPTransformState &State);
 
-  /// Returns true if an instruction \p I should be scalarized instead of
-  /// vectorized for the chosen vectorization factor.
-  bool shouldScalarizeInstruction(Instruction *I) const;
-
-  /// Returns true if we should generate a scalar version of \p IV.
-  bool needsScalarInduction(Instruction *IV) const;
-
   /// Returns (and creates if needed) the original loop trip count.
   Value *getOrCreateTripCount(Loop *NewLoop);
 
@@ -2479,21 +2472,6 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
   VecInd->addIncoming(LastInduction, LoopVectorLatch);
 }
 
-bool InnerLoopVectorizer::shouldScalarizeInstruction(Instruction *I) const {
-  return Cost->isScalarAfterVectorization(I, VF) ||
-         Cost->isProfitableToScalarize(I, VF);
-}
-
-bool InnerLoopVectorizer::needsScalarInduction(Instruction *IV) const {
-  if (shouldScalarizeInstruction(IV))
-    return true;
-  auto isScalarInst = [&](User *U) -> bool {
-    auto *I = cast<Instruction>(U);
-    return (OrigLoop->contains(I) && shouldScalarizeInstruction(I));
-  };
-  return llvm::any_of(IV->users(), isScalarInst);
-}
-
 void InnerLoopVectorizer::widenIntOrFpInduction(
     PHINode *IV, VPWidenIntOrFpInductionRecipe *Def, VPTransformState &State,
     Value *CanonicalIV) {
@@ -2549,27 +2527,6 @@ void InnerLoopVectorizer::widenIntOrFpInduction(
     return ScalarIV;
   };
 
-  // Create the vector values from the scalar IV, in the absence of creating a
-  // vector IV.
-  auto CreateSplatIV = [&](Value *ScalarIV, Value *Step) {
-    Value *Broadcasted = getBroadcastInstrs(ScalarIV);
-    for (unsigned Part = 0; Part < UF; ++Part) {
-      Value *StartIdx;
-      if (Step->getType()->isFloatingPointTy())
-        StartIdx =
-            getRuntimeVFAsFloat(Builder, Step->getType(), State.VF * Part);
-      else
-        StartIdx = getRuntimeVF(Builder, Step->getType(), State.VF * Part);
-
-      Value *EntryPart =
-          getStepVector(Broadcasted, StartIdx, Step, ID.getInductionOpcode(),
-                        State.VF, State.Builder);
-      State.set(Def, EntryPart, Part);
-      if (Trunc)
-        addMetadata(EntryPart, Trunc);
-    }
-  };
-
   // Fast-math-flags propagate from the original induction instruction.
   IRBuilder<>::FastMathFlagGuard FMFG(Builder);
   if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
@@ -2605,36 +2562,18 @@ void InnerLoopVectorizer::widenIntOrFpInduction(
     return;
   }
 
-  // Determine if we want a scalar version of the induction variable. This is
-  // true if the induction variable itself is not widened, or if it has at
-  // least one user in the loop that is not widened.
-  auto NeedsScalarIV = needsScalarInduction(EntryVal);
-  if (!NeedsScalarIV) {
+  // Create a new independent vector induction variable, if one is needed.
+  if (Def->needsVectorIV())
     createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State);
-    return;
-  }
 
-  // Try to create a new independent vector induction variable. If we can't
-  // create the phi node, we will splat the scalar induction variable in each
-  // loop iteration.
-  if (!shouldScalarizeInstruction(EntryVal)) {
-    createVectorIntOrFpInductionPHI(ID, Step, Start, EntryVal, Def, State);
-    Value *ScalarIV = CreateScalarIV(Step);
+  if (Def->needsScalarIV()) {
     // Create scalar steps that can be used by instructions we will later
     // scalarize. Note that the addition of the scalar steps will not increase
     // the number of instructions in the loop in the common case prior to
     // InstCombine. We will be trading one vector extract for each scalar step.
+    Value *ScalarIV = CreateScalarIV(Step);
     buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, State);
-    return;
   }
-
-  // All IV users are scalar instructions, so only emit a scalar IV, not a
-  // vectorised IV. Except when we tail-fold, then the splat IV feeds the
-  // predicate used by the masked loads/stores.
-  Value *ScalarIV = CreateScalarIV(Step);
-  if (!Cost->isScalarEpilogueAllowed())
-    CreateSplatIV(ScalarIV, Step);
-  buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, State);
 }
 
 void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
@@ -2663,17 +2602,15 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
   }
 
   // Determine the number of scalars we need to generate for each unroll
-  // iteration. If EntryVal is uniform, we only need to generate the first
-  // lane. Otherwise, we generate all VF values.
-  bool IsUniform =
-      Cost->isUniformAfterVectorization(cast<Instruction>(EntryVal), State.VF);
-  unsigned Lanes = IsUniform ? 1 : State.VF.getKnownMinValue();
+  // iteration.
+  bool FirstLaneOnly = vputils::onlyFirstLaneUsed(Def);
+  unsigned Lanes = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
   // Compute the scalar steps and save the results in State.
   Type *IntStepTy = IntegerType::get(ScalarIVTy->getContext(),
                                      ScalarIVTy->getScalarSizeInBits());
   Type *VecIVTy = nullptr;
   Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
-  if (!IsUniform && State.VF.isScalable()) {
+  if (!FirstLaneOnly && State.VF.isScalable()) {
     VecIVTy = VectorType::get(ScalarIVTy, State.VF);
     UnitStepVec =
         Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
@@ -2684,7 +2621,7 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
   for (unsigned Part = 0; Part < State.UF; ++Part) {
     Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part);
 
-    if (!IsUniform && State.VF.isScalable()) {
+    if (!FirstLaneOnly && State.VF.isScalable()) {
       auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
       auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
       if (ScalarIVTy->isFloatingPointTy())
@@ -4565,7 +4502,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
       // Determine the number of scalars we need to generate for each unroll
       // iteration. If the instruction is uniform, we only need to generate the
       // first lane. Otherwise, we generate all VF values.
-      bool IsUniform = Cost->isUniformAfterVectorization(P, State.VF);
+      bool IsUniform = vputils::onlyFirstLaneUsed(PhiR);
       assert((IsUniform || !State.VF.isScalable()) &&
              "Cannot scalarize a scalable VF");
       unsigned Lanes = IsUniform ? 1 : State.VF.getFixedValue();
@@ -5889,7 +5826,9 @@ bool LoopVectorizationCostModel::isEpilogueVectorizationProfitable(
   // consider interleaving beneficial (eg. MVE).
   if (TTI.getMaxInterleaveFactor(VF.getKnownMinValue()) <= 1)
     return false;
-  if (VF.getFixedValue() >= EpilogueVectorizationMinVF)
+  // FIXME: We should consider changing the threshold for scalable
+  // vectors to take VScaleForTuning into account.
+  if (VF.getKnownMinValue() >= EpilogueVectorizationMinVF)
     return true;
   return false;
 }
@@ -5940,29 +5879,21 @@ LoopVectorizationCostModel::selectEpilogueVectorizationFactor(
     return Result;
   }
 
-  auto FixedMainLoopVF = ElementCount::getFixed(MainLoopVF.getKnownMinValue());
-  if (MainLoopVF.isScalable())
-    LLVM_DEBUG(
-        dbgs() << "LEV: Epilogue vectorization using scalable vectors not "
-                  "yet supported. Converting to fixed-width (VF="
-               << FixedMainLoopVF << ") instead\n");
-
-  if (!isEpilogueVectorizationProfitable(FixedMainLoopVF)) {
+  if (!isEpilogueVectorizationProfitable(MainLoopVF)) {
     LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization is not profitable for "
                          "this loop\n");
     return Result;
   }
 
   for (auto &NextVF : ProfitableVFs)
-    if (ElementCount::isKnownLT(NextVF.Width, FixedMainLoopVF) &&
-        (Result.Width.getFixedValue() == 1 ||
-         isMoreProfitable(NextVF, Result)) &&
+    if (ElementCount::isKnownLT(NextVF.Width, MainLoopVF) &&
+        (Result.Width.isScalar() || isMoreProfitable(NextVF, Result)) &&
         LVP.hasPlanWithVF(NextVF.Width))
       Result = NextVF;
 
   if (Result != VectorizationFactor::Disabled())
     LLVM_DEBUG(dbgs() << "LEV: Vectorizing epilogue loop with VF = "
-                      << Result.Width.getFixedValue() << "\n";);
+                      << Result.Width << "\n";);
   return Result;
 }
 
@@ -8546,16 +8477,54 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I,
                                             Mask, Consecutive, Reverse);
 }
 
-VPWidenIntOrFpInductionRecipe *
-VPRecipeBuilder::tryToOptimizeInductionPHI(PHINode *Phi,
-                                           ArrayRef<VPValue *> Operands) const {
+static VPWidenIntOrFpInductionRecipe *
+createWidenInductionRecipe(PHINode *Phi, Instruction *PhiOrTrunc,
+                           VPValue *Start, const InductionDescriptor &IndDesc,
+                           LoopVectorizationCostModel &CM, Loop &OrigLoop,
+                           VFRange &Range) {
+  // Returns true if an instruction \p I should be scalarized instead of
+  // vectorized for the chosen vectorization factor.
+  auto ShouldScalarizeInstruction = [&CM](Instruction *I, ElementCount VF) {
+    return CM.isScalarAfterVectorization(I, VF) ||
+           CM.isProfitableToScalarize(I, VF);
+  };
+
+  bool NeedsScalarIV = LoopVectorizationPlanner::getDecisionAndClampRange(
+      [&](ElementCount VF) {
+        // Returns true if we should generate a scalar version of \p IV.
+        if (ShouldScalarizeInstruction(PhiOrTrunc, VF))
+          return true;
+        auto isScalarInst = [&](User *U) -> bool {
+          auto *I = cast<Instruction>(U);
+          return OrigLoop.contains(I) && ShouldScalarizeInstruction(I, VF);
+        };
+        return any_of(PhiOrTrunc->users(), isScalarInst);
+      },
+      Range);
+  bool NeedsScalarIVOnly = LoopVectorizationPlanner::getDecisionAndClampRange(
+      [&](ElementCount VF) {
+        return ShouldScalarizeInstruction(PhiOrTrunc, VF);
+      },
+      Range);
+  assert(IndDesc.getStartValue() ==
+         Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader()));
+  if (auto *TruncI = dyn_cast<TruncInst>(PhiOrTrunc)) {
+    return new VPWidenIntOrFpInductionRecipe(Phi, Start, IndDesc, TruncI,
+                                             NeedsScalarIV, !NeedsScalarIVOnly);
+  }
+  assert(isa<PHINode>(PhiOrTrunc) && "must be a phi node here");
+  return new VPWidenIntOrFpInductionRecipe(Phi, Start, IndDesc, NeedsScalarIV,
+                                           !NeedsScalarIVOnly);
+}
+
+VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI(
+    PHINode *Phi, ArrayRef<VPValue *> Operands, VFRange &Range) const {
+
   // Check if this is an integer or fp induction. If so, build the recipe that
   // produces its scalar and vector values.
-  if (auto *II = Legal->getIntOrFpInductionDescriptor(Phi)) {
-    assert(II->getStartValue() ==
-           Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));
-    return new VPWidenIntOrFpInductionRecipe(Phi, Operands[0], *II);
-  }
+  if (auto *II = Legal->getIntOrFpInductionDescriptor(Phi))
+    return createWidenInductionRecipe(Phi, Phi, Operands[0], *II, CM, *OrigLoop,
+                                      Range);
 
   return nullptr;
 }
@@ -8583,7 +8552,7 @@ VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate(
     auto *Phi = cast<PHINode>(I->getOperand(0));
     const InductionDescriptor &II = *Legal->getIntOrFpInductionDescriptor(Phi);
     VPValue *Start = Plan.getOrAddVPValue(II.getStartValue());
-    return new VPWidenIntOrFpInductionRecipe(Phi, Start, II, I);
+    return createWidenInductionRecipe(Phi, I, Start, II, CM, *OrigLoop, Range);
   }
   return nullptr;
 }
@@ -8865,7 +8834,7 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
   if (auto Phi = dyn_cast<PHINode>(Instr)) {
     if (Phi->getParent() != OrigLoop->getHeader())
       return tryToBlend(Phi, Operands, Plan);
-    if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands)))
+    if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, Range)))
       return toVPRecipeResult(Recipe);
 
     VPHeaderPHIRecipe *PhiRecipe = nullptr;
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 99c265fc5101..15b349f53fd9 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -471,17 +471,36 @@ static bool isValidForAlternation(unsigned Opcode) {
   return true;
 }
 
+static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
+                                       unsigned BaseIndex = 0);
+
+/// Checks if the provided operands of 2 cmp instructions are compatible, i.e.
+/// compatible instructions or constants, or just some other regular values.
+static bool areCompatibleCmpOps(Value *BaseOp0, Value *BaseOp1, Value *Op0,
+                                Value *Op1) {
+  return (isConstant(BaseOp0) && isConstant(Op0)) ||
+         (isConstant(BaseOp1) && isConstant(Op1)) ||
+         (!isa<Instruction>(BaseOp0) && !isa<Instruction>(Op0) &&
+          !isa<Instruction>(BaseOp1) && !isa<Instruction>(Op1)) ||
+         getSameOpcode({BaseOp0, Op0}).getOpcode() ||
+         getSameOpcode({BaseOp1, Op1}).getOpcode();
+}
+
 /// \returns analysis of the Instructions in \p VL described in
 /// InstructionsState, the Opcode that we suppose the whole list
 /// could be vectorized even if its structure is diverse.
 static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
-                                       unsigned BaseIndex = 0) {
+                                       unsigned BaseIndex) {
   // Make sure these are all Instructions.
   if (llvm::any_of(VL, [](Value *V) { return !isa<Instruction>(V); }))
     return InstructionsState(VL[BaseIndex], nullptr, nullptr);
 
   bool IsCastOp = isa<CastInst>(VL[BaseIndex]);
   bool IsBinOp = isa<BinaryOperator>(VL[BaseIndex]);
+  bool IsCmpOp = isa<CmpInst>(VL[BaseIndex]);
+  CmpInst::Predicate BasePred =
+      IsCmpOp ? cast<CmpInst>(VL[BaseIndex])->getPredicate()
+              : CmpInst::BAD_ICMP_PREDICATE;
   unsigned Opcode = cast<Instruction>(VL[BaseIndex])->getOpcode();
   unsigned AltOpcode = Opcode;
   unsigned AltIndex = BaseIndex;
@@ -514,6 +533,57 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
           continue;
         }
       }
+    } else if (IsCmpOp && isa<CmpInst>(VL[Cnt])) {
+      auto *BaseInst = cast<Instruction>(VL[BaseIndex]);
+      auto *Inst = cast<Instruction>(VL[Cnt]);
+      Type *Ty0 = BaseInst->getOperand(0)->getType();
+      Type *Ty1 = Inst->getOperand(0)->getType();
+      if (Ty0 == Ty1) {
+        Value *BaseOp0 = BaseInst->getOperand(0);
+        Value *BaseOp1 = BaseInst->getOperand(1);
+        Value *Op0 = Inst->getOperand(0);
+        Value *Op1 = Inst->getOperand(1);
+        CmpInst::Predicate CurrentPred =
+            cast<CmpInst>(VL[Cnt])->getPredicate();
+        CmpInst::Predicate SwappedCurrentPred =
+            CmpInst::getSwappedPredicate(CurrentPred);
+        // Check for compatible operands. If the corresponding operands are not
+        // compatible - need to perform alternate vectorization.
+        if (InstOpcode == Opcode) {
+          if (BasePred == CurrentPred &&
+              areCompatibleCmpOps(BaseOp0, BaseOp1, Op0, Op1))
+            continue;
+          if (BasePred == SwappedCurrentPred &&
+              areCompatibleCmpOps(BaseOp0, BaseOp1, Op1, Op0))
+            continue;
+          if (E == 2 &&
+              (BasePred == CurrentPred || BasePred == SwappedCurrentPred))
+            continue;
+          auto *AltInst = cast<CmpInst>(VL[AltIndex]);
+          CmpInst::Predicate AltPred = AltInst->getPredicate();
+          Value *AltOp0 = AltInst->getOperand(0);
+          Value *AltOp1 = AltInst->getOperand(1);
+          // Check if operands are compatible with alternate operands.
+          if (AltPred == CurrentPred &&
+              areCompatibleCmpOps(AltOp0, AltOp1, Op0, Op1))
+            continue;
+          if (AltPred == SwappedCurrentPred &&
+              areCompatibleCmpOps(AltOp0, AltOp1, Op1, Op0))
+            continue;
+        }
+        if (BaseIndex == AltIndex) {
+          assert(isValidForAlternation(Opcode) &&
+                 isValidForAlternation(InstOpcode) &&
+                 "Cast isn't safe for alternation, logic needs to be updated!");
+          AltIndex = Cnt;
+          continue;
+        }
+        auto *AltInst = cast<CmpInst>(VL[AltIndex]);
+        CmpInst::Predicate AltPred = AltInst->getPredicate();
+        if (BasePred == CurrentPred || BasePred == SwappedCurrentPred ||
+            AltPred == CurrentPred || AltPred == SwappedCurrentPred)
+          continue;
+      }
     } else if (InstOpcode == Opcode || InstOpcode == AltOpcode)
       continue;
     return InstructionsState(VL[BaseIndex], nullptr, nullptr);
@@ -3307,9 +3377,14 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
       MapVector<OrdersType, unsigned,
                 DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo>>
           OrdersUses;
+      // Do the analysis for each tree entry only once, otherwise the order of
+      // the same node my be considered several times, though might be not
+      // profitable.
       SmallPtrSet<const TreeEntry *, 4> VisitedOps;
       for (const auto &Op : Data.second) {
         TreeEntry *OpTE = Op.second;
+        if (!VisitedOps.insert(OpTE).second)
+          continue;
         if (!OpTE->ReuseShuffleIndices.empty() ||
             (IgnoreReorder && OpTE == VectorizableTree.front().get()))
           continue;
@@ -3333,9 +3408,8 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
         } else {
           ++OrdersUses.insert(std::make_pair(Order, 0)).first->second;
         }
-        if (VisitedOps.insert(OpTE).second)
-          OrdersUses.insert(std::make_pair(OrdersType(), 0)).first->second +=
-              OpTE->UserTreeIndices.size();
+        OrdersUses.insert(std::make_pair(OrdersType(), 0)).first->second +=
+            OpTE->UserTreeIndices.size();
         assert(OrdersUses[{}] > 0 && "Counter cannot be less than 0.");
         --OrdersUses[{}];
       }
@@ -4350,9 +4424,41 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
       LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
 
       // Reorder operands if reordering would enable vectorization.
-      if (isa<BinaryOperator>(VL0)) {
+      auto *CI = dyn_cast<CmpInst>(VL0);
+      if (isa<BinaryOperator>(VL0) || CI) {
         ValueList Left, Right;
-        reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
+        if (!CI || all_of(VL, [](Value *V) {
+              return cast<CmpInst>(V)->isCommutative();
+            })) {
+          reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
+        } else {
+          CmpInst::Predicate P0 = CI->getPredicate();
+          CmpInst::Predicate AltP0 = cast<CmpInst>(S.AltOp)->getPredicate();
+          CmpInst::Predicate AltP0Swapped = CmpInst::getSwappedPredicate(AltP0);
+          Value *BaseOp0 = VL0->getOperand(0);
+          Value *BaseOp1 = VL0->getOperand(1);
+          // Collect operands - commute if it uses the swapped predicate or
+          // alternate operation.
+          for (Value *V : VL) {
+            auto *Cmp = cast<CmpInst>(V);
+            Value *LHS = Cmp->getOperand(0);
+            Value *RHS = Cmp->getOperand(1);
+            CmpInst::Predicate CurrentPred = CI->getPredicate();
+            CmpInst::Predicate CurrentPredSwapped =
+                CmpInst::getSwappedPredicate(CurrentPred);
+            if (P0 == AltP0 || P0 == AltP0Swapped) {
+              if ((P0 == CurrentPred &&
+                   !areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS)) ||
+                  (P0 == CurrentPredSwapped &&
+                   !areCompatibleCmpOps(BaseOp0, BaseOp1, RHS, LHS)))
+                std::swap(LHS, RHS);
+            } else if (!areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS)) {
+              std::swap(LHS, RHS);
+            }
+            Left.push_back(LHS);
+            Right.push_back(RHS);
+          }
+        }
         TE->setOperand(0, Left);
         TE->setOperand(1, Right);
         buildTree_rec(Left, Depth + 1, {TE, 0});
@@ -5284,7 +5390,8 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
              ((Instruction::isBinaryOp(E->getOpcode()) &&
                Instruction::isBinaryOp(E->getAltOpcode())) ||
               (Instruction::isCast(E->getOpcode()) &&
-               Instruction::isCast(E->getAltOpcode()))) &&
+               Instruction::isCast(E->getAltOpcode())) ||
+              (isa<CmpInst>(VL0) && isa<CmpInst>(E->getAltOp()))) &&
              "Invalid Shuffle Vector Operand");
       InstructionCost ScalarCost = 0;
       if (NeedToShuffleReuses) {
@@ -5332,6 +5439,14 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
         VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind);
         VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy,
                                                CostKind);
+      } else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) {
+        VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy,
+                                          Builder.getInt1Ty(),
+                                          CI0->getPredicate(), CostKind, VL0);
+        VecCost += TTI->getCmpSelInstrCost(
+            E->getOpcode(), ScalarTy, Builder.getInt1Ty(),
+            cast<CmpInst>(E->getAltOp())->getPredicate(), CostKind,
+            E->getAltOp());
       } else {
         Type *Src0SclTy = E->getMainOp()->getOperand(0)->getType();
         Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType();
@@ -5348,6 +5463,29 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
           E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,
           [E](Instruction *I) {
             assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
+            if (auto *CI0 = dyn_cast<CmpInst>(E->getMainOp())) {
+              auto *AltCI0 = cast<CmpInst>(E->getAltOp());
+              auto *CI = cast<CmpInst>(I);
+              CmpInst::Predicate P0 = CI0->getPredicate();
+              CmpInst::Predicate AltP0 = AltCI0->getPredicate();
+              CmpInst::Predicate AltP0Swapped =
+                  CmpInst::getSwappedPredicate(AltP0);
+              CmpInst::Predicate CurrentPred = CI->getPredicate();
+              CmpInst::Predicate CurrentPredSwapped =
+                  CmpInst::getSwappedPredicate(CurrentPred);
+              if (P0 == AltP0 || P0 == AltP0Swapped) {
+                // Alternate cmps have same/swapped predicate as main cmps but
+                // different order of compatible operands.
+                return !(
+                    (P0 == CurrentPred &&
+                     areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1),
+                                         I->getOperand(0), I->getOperand(1))) ||
+                    (P0 == CurrentPredSwapped &&
+                     areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1),
+                                         I->getOperand(1), I->getOperand(0))));
+              }
+              return CurrentPred != P0 && CurrentPredSwapped != P0;
+            }
             return I->getOpcode() == E->getAltOpcode();
           },
           Mask);
@@ -6830,11 +6968,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
              ((Instruction::isBinaryOp(E->getOpcode()) &&
                Instruction::isBinaryOp(E->getAltOpcode())) ||
               (Instruction::isCast(E->getOpcode()) &&
-               Instruction::isCast(E->getAltOpcode()))) &&
+               Instruction::isCast(E->getAltOpcode())) ||
+              (isa<CmpInst>(VL0) && isa<CmpInst>(E->getAltOp()))) &&
              "Invalid Shuffle Vector Operand");
 
       Value *LHS = nullptr, *RHS = nullptr;
-      if (Instruction::isBinaryOp(E->getOpcode())) {
+      if (Instruction::isBinaryOp(E->getOpcode()) || isa<CmpInst>(VL0)) {
         setInsertPointAfterBundle(E);
         LHS = vectorizeTree(E->getOperand(0));
         RHS = vectorizeTree(E->getOperand(1));
@@ -6854,6 +6993,15 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
             static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS, RHS);
         V1 = Builder.CreateBinOp(
             static_cast<Instruction::BinaryOps>(E->getAltOpcode()), LHS, RHS);
+      } else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) {
+        V0 = Builder.CreateCmp(CI0->getPredicate(), LHS, RHS);
+        auto *AltCI = cast<CmpInst>(E->getAltOp());
+        CmpInst::Predicate AltPred = AltCI->getPredicate();
+        unsigned AltIdx =
+            std::distance(E->Scalars.begin(), find(E->Scalars, AltCI));
+        if (AltCI->getOperand(0) != E->getOperand(0)[AltIdx])
+          AltPred = CmpInst::getSwappedPredicate(AltPred);
+        V1 = Builder.CreateCmp(AltPred, LHS, RHS);
       } else {
         V0 = Builder.CreateCast(
             static_cast<Instruction::CastOps>(E->getOpcode()), LHS, VecTy);
@@ -6878,6 +7026,29 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
           E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,
           [E](Instruction *I) {
             assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
+            if (auto *CI0 = dyn_cast<CmpInst>(E->getMainOp())) {
+              auto *AltCI0 = cast<CmpInst>(E->getAltOp());
+              auto *CI = cast<CmpInst>(I);
+              CmpInst::Predicate P0 = CI0->getPredicate();
+              CmpInst::Predicate AltP0 = AltCI0->getPredicate();
+              CmpInst::Predicate AltP0Swapped =
+                  CmpInst::getSwappedPredicate(AltP0);
+              CmpInst::Predicate CurrentPred = CI->getPredicate();
+              CmpInst::Predicate CurrentPredSwapped =
+                  CmpInst::getSwappedPredicate(CurrentPred);
+              if (P0 == AltP0 || P0 == AltP0Swapped) {
+                // Alternate cmps have same/swapped predicate as main cmps but
+                // different order of compatible operands.
+                return !(
+                    (P0 == CurrentPred &&
+                     areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1),
+                                         I->getOperand(0), I->getOperand(1))) ||
+                    (P0 == CurrentPredSwapped &&
+                     areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1),
+                                         I->getOperand(1), I->getOperand(0))));
+              }
+              return CurrentPred != P0 && CurrentPredSwapped != P0;
+            }
             return I->getOpcode() == E->getAltOpcode();
           },
           Mask, &OpScalars, &AltScalars);
@@ -7676,11 +7847,8 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
     for (ScheduleData *BundleMember = picked; BundleMember;
          BundleMember = BundleMember->NextInBundle) {
       Instruction *pickedInst = BundleMember->Inst;
-      if (pickedInst->getNextNode() != LastScheduledInst) {
-        BS->BB->getInstList().remove(pickedInst);
-        BS->BB->getInstList().insert(LastScheduledInst->getIterator(),
-                                     pickedInst);
-      }
+      if (pickedInst->getNextNode() != LastScheduledInst)
+        pickedInst->moveBefore(LastScheduledInst);
       LastScheduledInst = pickedInst;
     }
 
@@ -8444,7 +8612,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
       if (R.isTreeTinyAndNotFullyVectorizable())
         continue;
       R.reorderTopToBottom();
-      R.reorderBottomToTop();
+      R.reorderBottomToTop(!isa<InsertElementInst>(Ops.front()));
       R.buildExternalUses();
 
       R.computeMinimumValueSizes();
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index e5dded3c0f1e..8822c0004eb2 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -75,7 +75,8 @@ class VPRecipeBuilder {
   /// Check if an induction recipe should be constructed for \I. If so build and
   /// return it. If not, return null.
   VPWidenIntOrFpInductionRecipe *
-  tryToOptimizeInductionPHI(PHINode *Phi, ArrayRef<VPValue *> Operands) const;
+  tryToOptimizeInductionPHI(PHINode *Phi, ArrayRef<VPValue *> Operands,
+                            VFRange &Range) const;
 
   /// Optimize the special case where the operand of \p I is a constant integer
   /// induction variable.
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index a96c122db2a9..342d4a074e10 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -1649,3 +1649,9 @@ void VPSlotTracker::assignSlots(const VPlan &Plan) {
       for (VPValue *Def : Recipe.definedValues())
         assignSlot(Def);
 }
+
+bool vputils::onlyFirstLaneUsed(VPValue *Def) {
+  return all_of(Def->users(), [Def](VPUser *U) {
+    return cast<VPRecipeBase>(U)->onlyFirstLaneUsed(Def);
+  });
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 824440f98a8b..bcaabca692cc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -759,6 +759,14 @@ public:
   bool mayReadOrWriteMemory() const {
     return mayReadFromMemory() || mayWriteToMemory();
   }
+
+  /// Returns true if the recipe only uses the first lane of operand \p Op.
+  /// Conservatively returns false.
+  virtual bool onlyFirstLaneUsed(const VPValue *Op) const {
+    assert(is_contained(operands(), Op) &&
+           "Op must be an operand of the recipe");
+    return false;
+  }
 };
 
 inline bool VPUser::classof(const VPDef *Def) {
@@ -893,6 +901,24 @@ public:
 
   /// Set the fast-math flags.
   void setFastMathFlags(FastMathFlags FMFNew);
+
+  /// Returns true if the recipe only uses the first lane of operand \p Op.
+  bool onlyFirstLaneUsed(const VPValue *Op) const override {
+    assert(is_contained(operands(), Op) &&
+           "Op must be an operand of the recipe");
+    if (getOperand(0) != Op)
+      return false;
+    switch (getOpcode()) {
+    default:
+      return false;
+    case VPInstruction::ActiveLaneMask:
+    case VPInstruction::CanonicalIVIncrement:
+    case VPInstruction::CanonicalIVIncrementNUW:
+    case VPInstruction::BranchOnCount:
+      return true;
+    };
+    llvm_unreachable("switch should return");
+  }
 };
 
 /// VPWidenRecipe is a recipe for producing a copy of vector type its
@@ -1027,18 +1053,24 @@ public:
 class VPWidenIntOrFpInductionRecipe : public VPRecipeBase, public VPValue {
   PHINode *IV;
   const InductionDescriptor &IndDesc;
+  bool NeedsScalarIV;
+  bool NeedsVectorIV;
 
 public:
   VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start,
-                                const InductionDescriptor &IndDesc)
+                                const InductionDescriptor &IndDesc,
+                                bool NeedsScalarIV, bool NeedsVectorIV)
       : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start}), VPValue(IV, this),
-        IV(IV), IndDesc(IndDesc) {}
+        IV(IV), IndDesc(IndDesc), NeedsScalarIV(NeedsScalarIV),
+        NeedsVectorIV(NeedsVectorIV) {}
 
   VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start,
                                 const InductionDescriptor &IndDesc,
-                                TruncInst *Trunc)
+                                TruncInst *Trunc, bool NeedsScalarIV,
+                                bool NeedsVectorIV)
       : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start}), VPValue(Trunc, this),
-        IV(IV), IndDesc(IndDesc) {}
+        IV(IV), IndDesc(IndDesc), NeedsScalarIV(NeedsScalarIV),
+        NeedsVectorIV(NeedsVectorIV) {}
 
   ~VPWidenIntOrFpInductionRecipe() override = default;
 
@@ -1082,6 +1114,12 @@ public:
     const TruncInst *TruncI = getTruncInst();
     return TruncI ? TruncI->getType() : IV->getType();
   }
+
+  /// Returns true if a scalar phi needs to be created for the induction.
+  bool needsScalarIV() const { return NeedsScalarIV; }
+
+  /// Returns true if a vector phi needs to be created for the induction.
+  bool needsVectorIV() const { return NeedsVectorIV; }
 };
 
 /// A pure virtual base class for all recipes modeling header phis, including
@@ -1318,6 +1356,17 @@ public:
   void print(raw_ostream &O, const Twine &Indent,
              VPSlotTracker &SlotTracker) const override;
 #endif
+
+  /// Returns true if the recipe only uses the first lane of operand \p Op.
+  bool onlyFirstLaneUsed(const VPValue *Op) const override {
+    assert(is_contained(operands(), Op) &&
+           "Op must be an operand of the recipe");
+    // Recursing through Blend recipes only, must terminate at header phi's the
+    // latest.
+    return all_of(users(), [this](VPUser *U) {
+      return cast<VPRecipeBase>(U)->onlyFirstLaneUsed(this);
+    });
+  }
 };
 
 /// VPInterleaveRecipe is a recipe for transforming an interleave group of load
@@ -1495,6 +1544,13 @@ public:
   bool isPacked() const { return AlsoPack; }
 
   bool isPredicated() const { return IsPredicated; }
+
+  /// Returns true if the recipe only uses the first lane of operand \p Op.
+  bool onlyFirstLaneUsed(const VPValue *Op) const override {
+    assert(is_contained(operands(), Op) &&
+           "Op must be an operand of the recipe");
+    return isUniform();
+  }
 };
 
 /// A recipe for generating conditional branches on the bits of a mask.
@@ -1651,6 +1707,16 @@ public:
   void print(raw_ostream &O, const Twine &Indent,
              VPSlotTracker &SlotTracker) const override;
 #endif
+
+  /// Returns true if the recipe only uses the first lane of operand \p Op.
+  bool onlyFirstLaneUsed(const VPValue *Op) const override {
+    assert(is_contained(operands(), Op) &&
+           "Op must be an operand of the recipe");
+
+    // Widened, consecutive memory operations only demand the first lane of
+    // their address.
+    return Op == getAddr() && isConsecutive();
+  }
 };
 
 /// Canonical scalar induction phi of the vector loop. Starting at the specified
@@ -1686,6 +1752,13 @@ public:
   const Type *getScalarType() const {
     return getOperand(0)->getLiveInIRValue()->getType();
   }
+
+  /// Returns true if the recipe only uses the first lane of operand \p Op.
+  bool onlyFirstLaneUsed(const VPValue *Op) const override {
+    assert(is_contained(operands(), Op) &&
+           "Op must be an operand of the recipe");
+    return true;
+  }
 };
 
 /// A Recipe for widening the canonical induction variable of the vector loop.
@@ -2766,6 +2839,14 @@ public:
   /// Return true if all visited instruction can be combined.
   bool isCompletelySLP() const { return CompletelySLP; }
 };
+
+namespace vputils {
+
+/// Returns true if only the first lane of \p Def is used.
+bool onlyFirstLaneUsed(VPValue *Def);
+
+} // end namespace vputils
+
 } // end namespace llvm
 
 #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index fb5f3d428189..70ce773a8a85 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -47,7 +47,8 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
         auto *Phi = cast<PHINode>(VPPhi->getUnderlyingValue());
         if (const auto *II = GetIntOrFpInductionDescriptor(Phi)) {
           VPValue *Start = Plan->getOrAddVPValue(II->getStartValue());
-          NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start, *II);
+          NewRecipe =
+              new VPWidenIntOrFpInductionRecipe(Phi, Start, *II, false, true);
         } else {
           Plan->addVPValue(Phi, VPPhi);
           continue;
@@ -341,10 +342,16 @@ void VPlanTransforms::removeRedundantCanonicalIVs(VPlan &Plan) {
   for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
     auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
 
-    // If the induction recipe is canonical and the types match, use it
-    // directly.
-    if (WidenOriginalIV && WidenOriginalIV->isCanonical() &&
-        WidenOriginalIV->getScalarType() == WidenNewIV->getScalarType()) {
+    if (!WidenOriginalIV || !WidenOriginalIV->isCanonical() ||
+        WidenOriginalIV->getScalarType() != WidenNewIV->getScalarType())
+      continue;
+
+    // Replace WidenNewIV with WidenOriginalIV if WidenOriginalIV provides
+    // everything WidenNewIV's users need. That is, WidenOriginalIV will
+    // generate a vector phi or all users of WidenNewIV demand the first lane
+    // only.
+    if (WidenOriginalIV->needsVectorIV() ||
+        vputils::onlyFirstLaneUsed(WidenNewIV)) {
       WidenNewIV->replaceAllUsesWith(WidenOriginalIV);
       WidenNewIV->eraseFromParent();
       return;
diff --git a/llvm/lib/Transforms/Vectorize/Vectorize.cpp b/llvm/lib/Transforms/Vectorize/Vectorize.cpp
index 0296a995ad29..010ca28fc237 100644
--- a/llvm/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/Vectorize.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Analysis/Passes.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
 
 using namespace llvm;
 
diff --git a/llvm/tools/llvm-ar/llvm-ar.cpp b/llvm/tools/llvm-ar/llvm-ar.cpp
index f7b29b884027..8842162f5216 100644
--- a/llvm/tools/llvm-ar/llvm-ar.cpp
+++ b/llvm/tools/llvm-ar/llvm-ar.cpp
@@ -90,6 +90,7 @@ OPTIONS:
   --rsp-quoting         - quoting style for response files
     =posix              -   posix
     =windows            -   windows
+  --thin                - create a thin archive
   --version             - print the version and exit
   @<file>               - read options from <file>
 
@@ -118,7 +119,7 @@ MODIFIERS:
   [P] - use full names when matching (implied for thin archives)
   [s] - create an archive index (cf. ranlib)
   [S] - do not build a symbol table
-  [T] - create a thin archive
+  [T] - deprecated, use --thin instead
   [u] - update only [files] newer than archive contents
   [U] - use actual timestamps and uids/gids
   [v] - be verbose about actions taken
@@ -390,8 +391,6 @@ static ArchiveOperation parseCommandLine() {
       break;
     case 'T':
       Thin = true;
-      // Thin archives store path names, so P should be forced.
-      CompareFullPath = true;
       break;
     case 'L':
       AddLibrary = true;
@@ -407,6 +406,10 @@ static ArchiveOperation parseCommandLine() {
     }
   }
 
+  // Thin archives store path names, so P should be forced.
+  if (Thin)
+    CompareFullPath = true;
+
   // At this point, the next thing on the command line must be
   // the archive name.
   getArchive();
@@ -965,6 +968,8 @@ static void createSymbolTable(object::Archive *OldArchive) {
   if (OldArchive->hasSymbolTable())
     return;
 
+  if (OldArchive->isThin())
+    Thin = true;
   performWriteOperation(CreateSymTab, OldArchive, nullptr, nullptr);
 }
 
@@ -1202,6 +1207,11 @@ static int ar_main(int argc, char **argv) {
       continue;
     }
 
+    if (strcmp(*ArgIt, "--thin") == 0) {
+      Thin = true;
+      continue;
+    }
+
     Match = matchFlagWithArg("format", ArgIt, Argv);
     if (Match) {
       FormatType = StringSwitch<Format>(Match)
diff --git a/llvm/tools/llvm-as/llvm-as.cpp b/llvm/tools/llvm-as/llvm-as.cpp
index 307a7f9b7999..11dad0d9c369 100644
--- a/llvm/tools/llvm-as/llvm-as.cpp
+++ b/llvm/tools/llvm-as/llvm-as.cpp
@@ -106,7 +106,7 @@ static void WriteOutputFile(const Module *M, const ModuleSummaryIndex *Index) {
     else
       // Otherwise, with an empty Module but non-empty Index, we write a
       // combined index.
-      WriteIndexToFile(*IndexToWrite, Out->os());
+      writeIndexToFile(*IndexToWrite, Out->os());
   }
 
   // Declare success.
diff --git a/llvm/tools/llvm-extract/llvm-extract.cpp b/llvm/tools/llvm-extract/llvm-extract.cpp
index cb1c4116ff19..3cdef529504e 100644
--- a/llvm/tools/llvm-extract/llvm-extract.cpp
+++ b/llvm/tools/llvm-extract/llvm-extract.cpp
@@ -21,6 +21,7 @@
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IRReader/IRReader.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileSystem.h"
diff --git a/llvm/tools/llvm-lto/llvm-lto.cpp b/llvm/tools/llvm-lto/llvm-lto.cpp
index d78c4dff7db4..8fc3a5d68500 100644
--- a/llvm/tools/llvm-lto/llvm-lto.cpp
+++ b/llvm/tools/llvm-lto/llvm-lto.cpp
@@ -497,7 +497,7 @@ static void createCombinedModuleSummaryIndex() {
   raw_fd_ostream OS(OutputFilename + ".thinlto.bc", EC,
                     sys::fs::OpenFlags::OF_None);
   error(EC, "error opening the file '" + OutputFilename + ".thinlto.bc'");
-  WriteIndexToFile(CombinedIndex, OS);
+  writeIndexToFile(CombinedIndex, OS);
   OS.close();
 }
 
@@ -660,7 +660,7 @@ private:
     std::error_code EC;
     raw_fd_ostream OS(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
     error(EC, "error opening the file '" + OutputFilename + "'");
-    WriteIndexToFile(*CombinedIndex, OS);
+    writeIndexToFile(*CombinedIndex, OS);
   }
 
   /// Load the combined index from disk, then compute and generate
@@ -698,7 +698,7 @@ private:
       std::error_code EC;
       raw_fd_ostream OS(OutputName, EC, sys::fs::OpenFlags::OF_None);
       error(EC, "error opening the file '" + OutputName + "'");
-      WriteIndexToFile(*Index, OS, &ModuleToSummariesForIndex);
+      writeIndexToFile(*Index, OS, &ModuleToSummariesForIndex);
     }
   }
 
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp b/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp
index 3cac77411845..6b731abd9ed9 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp
+++ b/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp
@@ -417,6 +417,7 @@ Error MachOLayoutBuilder::layoutTail(uint64_t Offset) {
     case MachO::LC_SUB_UMBRELLA:
     case MachO::LC_SUB_CLIENT:
     case MachO::LC_SUB_LIBRARY:
+    case MachO::LC_LINKER_OPTION:
       // Nothing to update.
       break;
     default:
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 5e58c1365d80..6000460d3c23 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -255,9 +255,7 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
   }
 
   auto Reader = std::move(ReaderOrErr.get());
-  bool IsIRProfile = Reader->isIRLevelProfile();
-  bool HasCSIRProfile = Reader->hasCSIRLevelProfile();
-  if (Error E = WC->Writer.setIsIRLevelProfile(IsIRProfile, HasCSIRProfile)) {
+  if (Error E = WC->Writer.mergeProfileKind(Reader->getProfileKind())) {
     consumeError(std::move(E));
     WC->Errors.emplace_back(
         make_error<StringError>(
@@ -266,7 +264,6 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
         Filename);
     return;
   }
-  WC->Writer.setInstrEntryBBEnabled(Reader->instrEntryBBEnabled());
 
   for (auto &I : *Reader) {
     if (Remapper)
@@ -2095,7 +2092,8 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
                             bool ShowAllFunctions, bool ShowCS,
                             uint64_t ValueCutoff, bool OnlyListBelow,
                             const std::string &ShowFunction, bool TextFormat,
-                            bool ShowBinaryIds, raw_fd_ostream &OS) {
+                            bool ShowBinaryIds, bool ShowCovered,
+                            raw_fd_ostream &OS) {
   auto ReaderOrErr = InstrProfReader::create(Filename);
   std::vector<uint32_t> Cutoffs = std::move(DetailedSummaryCutoffs);
   if (ShowDetailedSummary && Cutoffs.empty()) {
@@ -2152,6 +2150,13 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
     assert(Func.Counts.size() > 0 && "function missing entry counter");
     Builder.addRecord(Func);
 
+    if (ShowCovered) {
+      if (std::any_of(Func.Counts.begin(), Func.Counts.end(),
+                      [](uint64_t C) { return C; }))
+        OS << Func.Name << "\n";
+      continue;
+    }
+
     uint64_t FuncMax = 0;
     uint64_t FuncSum = 0;
     for (size_t I = 0, E = Func.Counts.size(); I < E; ++I) {
@@ -2228,7 +2233,7 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
   if (Reader->hasError())
     exitWithError(Reader->getError(), Filename);
 
-  if (TextFormat)
+  if (TextFormat || ShowCovered)
     return 0;
   std::unique_ptr<ProfileSummary> PS(Builder.getSummary());
   bool IsIR = Reader->isIRLevelProfile();
@@ -2579,6 +2584,9 @@ static int show_main(int argc, const char *argv[]) {
       "debug-info", cl::init(""),
       cl::desc("Read and extract profile metadata from debug info and show "
                "the functions it found."));
+  cl::opt<bool> ShowCovered(
+      "covered", cl::init(false),
+      cl::desc("Show only the functions that have been executed."));
 
   cl::ParseCommandLineOptions(argc, argv, "LLVM profile data summary\n");
 
@@ -2610,7 +2618,7 @@ static int show_main(int argc, const char *argv[]) {
         Filename, ShowCounts, TopNFunctions, ShowIndirectCallTargets,
         ShowMemOPSizes, ShowDetailedSummary, DetailedSummaryCutoffs,
         ShowAllFunctions, ShowCS, ValueCutoff, OnlyListBelow, ShowFunction,
-        TextFormat, ShowBinaryIds, OS);
+        TextFormat, ShowBinaryIds, ShowCovered, OS);
   if (ProfileKind == sample)
     return showSampleProfile(Filename, ShowCounts, TopNFunctions,
                              ShowAllFunctions, ShowDetailedSummary,
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index cfb618117d2b..04a67225401f 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -6393,6 +6393,7 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printFileHeaders() {
                      unsigned(ELF::EF_AMDGPU_MACH));
         break;
       case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
+      case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
         W.printFlags("Flags", E.e_flags,
                      makeArrayRef(ElfHeaderAMDGPUFlagsABIVersion4),
                      unsigned(ELF::EF_AMDGPU_MACH),
diff --git a/llvm/tools/llvm-readobj/WasmDumper.cpp b/llvm/tools/llvm-readobj/WasmDumper.cpp
index d76332d1ba36..b4d726016437 100644
--- a/llvm/tools/llvm-readobj/WasmDumper.cpp
+++ b/llvm/tools/llvm-readobj/WasmDumper.cpp
@@ -183,7 +183,10 @@ void WasmDumper::printSectionHeaders() {
           W.printNumber("Offset", Seg.Offset.Value.Int32);
         else if (Seg.Offset.Opcode == wasm::WASM_OPCODE_I64_CONST)
           W.printNumber("Offset", Seg.Offset.Value.Int64);
-        else
+        else if (Seg.Offset.Opcode == wasm::WASM_OPCODE_GLOBAL_GET) {
+          ListScope Group(W, "Offset");
+          W.printNumber("Global", Seg.Offset.Value.Global);
+        } else
           llvm_unreachable("unknown init expr opcode");
       }
       break;
diff --git a/llvm/tools/llvm-stress/llvm-stress.cpp b/llvm/tools/llvm-stress/llvm-stress.cpp
index 941b529da9b2..9135d60fdf92 100644
--- a/llvm/tools/llvm-stress/llvm-stress.cpp
+++ b/llvm/tools/llvm-stress/llvm-stress.cpp
@@ -34,14 +34,15 @@
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
 #include "llvm/IR/Verifier.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/InitLLVM.h"
 #include "llvm/Support/ToolOutputFile.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/WithColor.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
index 4de619df5b5f..a1f8f4809d5f 100644
--- a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -4645,39 +4645,33 @@ static void GenerateVariantsOf(TreePatternNodePtr N,
   // If this node is commutative, consider the commuted order.
   bool isCommIntrinsic = N->isCommutativeIntrinsic(CDP);
   if (NodeInfo.hasProperty(SDNPCommutative) || isCommIntrinsic) {
-    assert((N->getNumChildren()>=2 || isCommIntrinsic) &&
+    unsigned Skip = isCommIntrinsic ? 1 : 0; // First operand is intrinsic id.
+    assert(N->getNumChildren() >= (2 + Skip) &&
            "Commutative but doesn't have 2 children!");
-    // Don't count children which are actually register references.
-    unsigned NC = 0;
-    for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i) {
+    // Don't allow commuting children which are actually register references.
+    bool NoRegisters = true;
+    unsigned i = 0 + Skip;
+    unsigned e = 2 + Skip;
+    for (; i != e; ++i) {
       TreePatternNode *Child = N->getChild(i);
       if (Child->isLeaf())
         if (DefInit *DI = dyn_cast<DefInit>(Child->getLeafValue())) {
           Record *RR = DI->getDef();
           if (RR->isSubClassOf("Register"))
-            continue;
+            NoRegisters = false;
         }
-      NC++;
     }
     // Consider the commuted order.
-    if (isCommIntrinsic) {
-      // Commutative intrinsic. First operand is the intrinsic id, 2nd and 3rd
-      // operands are the commutative operands, and there might be more operands
-      // after those.
-      assert(NC >= 3 &&
-             "Commutative intrinsic should have at least 3 children!");
-      std::vector<std::vector<TreePatternNodePtr>> Variants;
-      Variants.push_back(std::move(ChildVariants[0])); // Intrinsic id.
-      Variants.push_back(std::move(ChildVariants[2]));
-      Variants.push_back(std::move(ChildVariants[1]));
-      for (unsigned i = 3; i != NC; ++i)
-        Variants.push_back(std::move(ChildVariants[i]));
-      CombineChildVariants(N, Variants, OutVariants, CDP, DepVars);
-    } else if (NC == N->getNumChildren()) {
+    if (NoRegisters) {
       std::vector<std::vector<TreePatternNodePtr>> Variants;
-      Variants.push_back(std::move(ChildVariants[1]));
-      Variants.push_back(std::move(ChildVariants[0]));
-      for (unsigned i = 2; i != NC; ++i)
+      unsigned i = 0;
+      if (isCommIntrinsic)
+        Variants.push_back(std::move(ChildVariants[i++])); // Intrinsic id.
+      Variants.push_back(std::move(ChildVariants[i + 1]));
+      Variants.push_back(std::move(ChildVariants[i]));
+      i += 2;
+      // Remaining operands are not commuted.
+      for (; i != N->getNumChildren(); ++i)
         Variants.push_back(std::move(ChildVariants[i]));
       CombineChildVariants(N, Variants, OutVariants, CDP, DepVars);
     }
diff --git a/llvm/utils/TableGen/CodeGenSchedule.cpp b/llvm/utils/TableGen/CodeGenSchedule.cpp
index 7c1c37f7b370..e47bda725a17 100644
--- a/llvm/utils/TableGen/CodeGenSchedule.cpp
+++ b/llvm/utils/TableGen/CodeGenSchedule.cpp
@@ -521,6 +521,15 @@ void CodeGenSchedModels::collectProcModels() {
   RecVec ProcRecords = Records.getAllDerivedDefinitions("Processor");
   llvm::sort(ProcRecords, LessRecordFieldName());
 
+  // Check for duplicated names.
+  auto I = std::adjacent_find(ProcRecords.begin(), ProcRecords.end(),
+                              [](const Record *Rec1, const Record *Rec2) {
+    return Rec1->getValueAsString("Name") == Rec2->getValueAsString("Name");
+  });
+  if (I != ProcRecords.end())
+    PrintFatalError((*I)->getLoc(), "Duplicate processor name " +
+                    (*I)->getValueAsString("Name"));
+
   // Reserve space because we can. Reallocation would be ok.
   ProcModels.reserve(ProcRecords.size()+1);
 
@@ -1973,7 +1982,6 @@ void CodeGenSchedModels::collectProcResources() {
 
 void CodeGenSchedModels::checkCompleteness() {
   bool Complete = true;
-  bool HadCompleteModel = false;
   for (const CodeGenProcModel &ProcModel : procModels()) {
     const bool HasItineraries = ProcModel.hasItineraries();
     if (!ProcModel.ModelDef->getValueAsBit("CompleteModel"))
@@ -1985,7 +1993,7 @@ void CodeGenSchedModels::checkCompleteness() {
         continue;
       unsigned SCIdx = getSchedClassIdx(*Inst);
       if (!SCIdx) {
-        if (Inst->TheDef->isValueUnset("SchedRW") && !HadCompleteModel) {
+        if (Inst->TheDef->isValueUnset("SchedRW")) {
           PrintError(Inst->TheDef->getLoc(),
                      "No schedule information for instruction '" +
                          Inst->TheDef->getName() + "' in SchedMachineModel '" +
@@ -2013,7 +2021,6 @@ void CodeGenSchedModels::checkCompleteness() {
         Complete = false;
       }
     }
-    HadCompleteModel = true;
   }
   if (!Complete) {
     errs() << "\n\nIncomplete schedule models found.\n"
diff --git a/llvm/utils/TableGen/CompressInstEmitter.cpp b/llvm/utils/TableGen/CompressInstEmitter.cpp
index 94ad6ee285d4..1fd85939e74e 100644
--- a/llvm/utils/TableGen/CompressInstEmitter.cpp
+++ b/llvm/utils/TableGen/CompressInstEmitter.cpp
@@ -72,7 +72,6 @@
 #include "CodeGenTarget.h"
 #include "llvm/ADT/IndexedMap.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/llvm/utils/TableGen/GICombinerEmitter.cpp b/llvm/utils/TableGen/GICombinerEmitter.cpp
index 63a9ed682d4f..0dea1ef00e4b 100644
--- a/llvm/utils/TableGen/GICombinerEmitter.cpp
+++ b/llvm/utils/TableGen/GICombinerEmitter.cpp
@@ -11,21 +11,21 @@
 ///
 //===----------------------------------------------------------------------===//
 
+#include "CodeGenTarget.h"
+#include "GlobalISel/CodeExpander.h"
+#include "GlobalISel/CodeExpansions.h"
+#include "GlobalISel/GIMatchDag.h"
+#include "GlobalISel/GIMatchDagPredicate.h"
+#include "GlobalISel/GIMatchTree.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ScopedPrinter.h"
-#include "llvm/Support/Timer.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/StringMatcher.h"
 #include "llvm/TableGen/TableGenBackend.h"
-#include "CodeGenTarget.h"
-#include "GlobalISel/CodeExpander.h"
-#include "GlobalISel/CodeExpansions.h"
-#include "GlobalISel/GIMatchDag.h"
-#include "GlobalISel/GIMatchTree.h"
 #include <cstdint>
 
 using namespace llvm;
diff --git a/llvm/utils/TableGen/GlobalISel/CodeExpander.cpp b/llvm/utils/TableGen/GlobalISel/CodeExpander.cpp
index 3ebb293f466e..42b4aabf2755 100644
--- a/llvm/utils/TableGen/GlobalISel/CodeExpander.cpp
+++ b/llvm/utils/TableGen/GlobalISel/CodeExpander.cpp
@@ -12,7 +12,6 @@
 
 #include "CodeExpander.h"
 #include "CodeExpansions.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Error.h"
 
diff --git a/llvm/utils/TableGen/GlobalISel/CodeExpander.h b/llvm/utils/TableGen/GlobalISel/CodeExpander.h
index bd6946de5925..1291eb1ad940 100644
--- a/llvm/utils/TableGen/GlobalISel/CodeExpander.h
+++ b/llvm/utils/TableGen/GlobalISel/CodeExpander.h
@@ -15,10 +15,10 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Support/SMLoc.h"
 
 namespace llvm {
 class CodeExpansions;
+class SMLoc;
 class raw_ostream;
 
 /// Emit the given code with all '${foo}' placeholders expanded to their
diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDag.h b/llvm/utils/TableGen/GlobalISel/GIMatchDag.h
index 37570648cad1..4c3c610aff74 100644
--- a/llvm/utils/TableGen/GlobalISel/GIMatchDag.h
+++ b/llvm/utils/TableGen/GlobalISel/GIMatchDag.h
@@ -16,7 +16,6 @@
 #include "GIMatchDagPredicateDependencyEdge.h"
 
 namespace llvm {
-class GIMatchDag;
 
 /// This class manages lifetimes for data associated with the GIMatchDag object.
 class GIMatchDagContext {
diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDagEdge.cpp b/llvm/utils/TableGen/GlobalISel/GIMatchDagEdge.cpp
index e59cb3aae49a..796479467df7 100644
--- a/llvm/utils/TableGen/GlobalISel/GIMatchDagEdge.cpp
+++ b/llvm/utils/TableGen/GlobalISel/GIMatchDagEdge.cpp
@@ -8,6 +8,7 @@
 
 #include "GIMatchDagEdge.h"
 #include "GIMatchDagInstr.h"
+#include "GIMatchDagOperands.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.h b/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.h
index 0c39b50442b4..5e60448b30c1 100644
--- a/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.h
+++ b/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.h
@@ -9,11 +9,14 @@
 #ifndef LLVM_UTILS_TABLEGEN_GIMATCHDAGINSTR_H
 #define LLVM_UTILS_TABLEGEN_GIMATCHDAGINSTR_H
 
-#include "GIMatchDagOperands.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
+class CodeGenInstruction;
 class GIMatchDag;
+class GIMatchDagOperandList;
 
 /// Represents an instruction in the match DAG. This object knows very little
 /// about the actual instruction to be matched as the bulk of that is in
diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.cpp b/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.cpp
index 1aca2f9dc135..6a9e33ac515e 100644
--- a/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.cpp
+++ b/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.cpp
@@ -10,8 +10,8 @@
 
 #include "llvm/TableGen/Record.h"
 
-#include "GIMatchDagOperands.h"
 #include "../CodeGenInstruction.h"
+#include "GIMatchDag.h"
 
 using namespace llvm;
 
diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.h b/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.h
index 9b030d6edb13..08e541b76a5a 100644
--- a/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.h
+++ b/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.h
@@ -9,8 +9,12 @@
 #ifndef LLVM_UTILS_TABLEGEN_GIMATCHDAGPREDICATE_H
 #define LLVM_UTILS_TABLEGEN_GIMATCHDAGPREDICATE_H
 
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
-#include "GIMatchDag.h"
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+#include "llvm/Support/raw_ostream.h"
+#endif
 
 namespace llvm {
 class CodeExpansions;
diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.cpp b/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.cpp
index 2e804de1cd4e..921cbaf9c408 100644
--- a/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.cpp
+++ b/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.cpp
@@ -9,6 +9,7 @@
 #include "GIMatchDagPredicateDependencyEdge.h"
 
 #include "GIMatchDagInstr.h"
+#include "GIMatchDagOperands.h"
 #include "GIMatchDagPredicate.h"
 
 #include "llvm/Support/raw_ostream.h"
diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.h b/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.h
index 9552adc5c625..af91afc6073d 100644
--- a/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.h
+++ b/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.h
@@ -9,12 +9,14 @@
 #ifndef LLVM_UTILS_TABLEGEN_GIMATCHDAGPREDICATEEDGE_H
 #define LLVM_UTILS_TABLEGEN_GIMATCHDAGPREDICATEEDGE_H
 
-#include "GIMatchDagOperands.h"
+#include "llvm/Support/Compiler.h"
 
 namespace llvm {
-class GIMatchDag;
 class GIMatchDagInstr;
 class GIMatchDagPredicate;
+class GIMatchDagOperand;
+
+class raw_ostream;
 
 /// Represents a dependency that must be met to evaluate a predicate.
 ///
diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp b/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp
index 00d57404b069..42055ad4f608 100644
--- a/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp
+++ b/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "GIMatchTree.h"
+#include "GIMatchDagPredicate.h"
 
 #include "../CodeGenInstruction.h"
 
diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp
index 25bc0adc2a81..018aa7ee2f71 100644
--- a/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -32,7 +32,6 @@
 #include "CodeGenDAGPatterns.h"
 #include "SubtargetFeatureInfo.h"
 #include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CodeGenCoverage.h"
 #include "llvm/Support/CommandLine.h"
@@ -668,7 +667,6 @@ MatchTable &operator<<(MatchTable &Table, const MatchTableRecord &Value) {
 class OperandMatcher;
 class MatchAction;
 class PredicateMatcher;
-class RuleMatcher;
 
 class Matcher {
 public:
diff --git a/llvm/utils/TableGen/InfoByHwMode.cpp b/llvm/utils/TableGen/InfoByHwMode.cpp
index 3d236b828032..73c4fbf0a5eb 100644
--- a/llvm/utils/TableGen/InfoByHwMode.cpp
+++ b/llvm/utils/TableGen/InfoByHwMode.cpp
@@ -18,7 +18,6 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
-#include <set>
 #include <string>
 
 using namespace llvm;
diff --git a/llvm/utils/TableGen/InfoByHwMode.h b/llvm/utils/TableGen/InfoByHwMode.h
index c97add687ca2..44927d0bf0df 100644
--- a/llvm/utils/TableGen/InfoByHwMode.h
+++ b/llvm/utils/TableGen/InfoByHwMode.h
@@ -20,11 +20,9 @@
 
 #include <map>
 #include <string>
-#include <vector>
 
 namespace llvm {
 
-struct CodeGenHwModes;
 class Record;
 class raw_ostream;
 
diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index f4e5eb59cb80..a5aa4069e60f 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -18,7 +18,6 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/StringMatcher.h"
 #include "llvm/TableGen/StringToOffsetTable.h"
 #include "llvm/TableGen/TableGenBackend.h"
 #include <algorithm>
diff --git a/llvm/utils/TableGen/OptParserEmitter.cpp b/llvm/utils/TableGen/OptParserEmitter.cpp
index 0809432dfd0d..d54132f3190b 100644
--- a/llvm/utils/TableGen/OptParserEmitter.cpp
+++ b/llvm/utils/TableGen/OptParserEmitter.cpp
@@ -13,7 +13,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
-#include <cctype>
 #include <cstring>
 #include <map>
 #include <memory>
diff --git a/llvm/utils/TableGen/OptRSTEmitter.cpp b/llvm/utils/TableGen/OptRSTEmitter.cpp
index 5e44d033109a..11d896229f5b 100644
--- a/llvm/utils/TableGen/OptRSTEmitter.cpp
+++ b/llvm/utils/TableGen/OptRSTEmitter.cpp
@@ -8,15 +8,8 @@
 
 #include "OptEmitter.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/TableGenBackend.h"
-#include <cctype>
-#include <cstring>
-#include <map>
 
 using namespace llvm;
 
diff --git a/llvm/utils/TableGen/PredicateExpander.h b/llvm/utils/TableGen/PredicateExpander.h
index 9e7a4a3925ac..27f049a715aa 100644
--- a/llvm/utils/TableGen/PredicateExpander.h
+++ b/llvm/utils/TableGen/PredicateExpander.h
@@ -17,12 +17,12 @@
 #define LLVM_UTILS_TABLEGEN_PREDICATEEXPANDER_H
 
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/TableGen/Record.h"
+#include <vector>
 
 namespace llvm {
 
 class raw_ostream;
+class Record;
 
 class PredicateExpander {
   bool EmitCallsByRef;
diff --git a/llvm/utils/TableGen/RegisterBankEmitter.cpp b/llvm/utils/TableGen/RegisterBankEmitter.cpp
index 61f71309b6fb..d97d7acb87a7 100644
--- a/llvm/utils/TableGen/RegisterBankEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterBankEmitter.cpp
@@ -17,7 +17,6 @@
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
 
-#include "CodeGenHwModes.h"
 #include "CodeGenRegisters.h"
 #include "CodeGenTarget.h"
 
diff --git a/llvm/utils/TableGen/SearchableTableEmitter.cpp b/llvm/utils/TableGen/SearchableTableEmitter.cpp
index 327b90d59db6..dc5c96c662be 100644
--- a/llvm/utils/TableGen/SearchableTableEmitter.cpp
+++ b/llvm/utils/TableGen/SearchableTableEmitter.cpp
@@ -16,9 +16,6 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/SourceMgr.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include <algorithm>
@@ -32,8 +29,6 @@ using namespace llvm;
 
 namespace {
 
-struct GenericTable;
-
 int getAsInt(Init *B) {
   return cast<IntInit>(B->convertInitializerTo(IntRecTy::get()))->getValue();
 }
diff --git a/llvm/utils/TableGen/TableGen.cpp b/llvm/utils/TableGen/TableGen.cpp
index 24c11c8bc831..2d4a45f889be 100644
--- a/llvm/utils/TableGen/TableGen.cpp
+++ b/llvm/utils/TableGen/TableGen.cpp
@@ -289,7 +289,8 @@ int main(int argc, char **argv) {
 #define __has_feature(x) 0
 #endif
 
-#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) ||       \
+#if __has_feature(address_sanitizer) ||                                        \
+    (defined(__SANITIZE_ADDRESS__) && defined(__GNUC__)) ||                    \
     __has_feature(leak_sanitizer)
 
 #include <sanitizer/lsan_interface.h>
diff --git a/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp b/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
index 7518b262e6e9..74969053f095 100644
--- a/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
+++ b/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
@@ -14,6 +14,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "WebAssemblyDisassemblerEmitter.h"
+#include "CodeGenInstruction.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Record.h"
 
 namespace llvm {
diff --git a/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.h b/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.h
index 60d3d9433eca..aba3a4bfd302 100644
--- a/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.h
+++ b/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.h
@@ -14,12 +14,13 @@
 #ifndef LLVM_UTILS_TABLEGEN_WEBASSEMBLYDISASSEMBLEREMITTER_H
 #define LLVM_UTILS_TABLEGEN_WEBASSEMBLYDISASSEMBLEREMITTER_H
 
-#include "CodeGenInstruction.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
 
+class CodeGenInstruction;
+class raw_ostream;
+
 void emitWebAssemblyDisassemblerTables(
     raw_ostream &OS,
     const ArrayRef<const CodeGenInstruction *> &NumberedInstructions);
diff --git a/llvm/utils/TableGen/X86DisassemblerTables.cpp b/llvm/utils/TableGen/X86DisassemblerTables.cpp
index 90e71a354d17..81ddea99740d 100644
--- a/llvm/utils/TableGen/X86DisassemblerTables.cpp
+++ b/llvm/utils/TableGen/X86DisassemblerTables.cpp
@@ -15,9 +15,12 @@
 
 #include "X86DisassemblerTables.h"
 #include "X86DisassemblerShared.h"
-#include "llvm/ADT/STLExtras.h"
+#include "X86ModRMFilters.h"
+#include "llvm/ADT/STLArrayExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
 #include <map>
 
 using namespace llvm;
diff --git a/llvm/utils/TableGen/X86DisassemblerTables.h b/llvm/utils/TableGen/X86DisassemblerTables.h
index 2e4ff1e2ce08..966f7406efec 100644
--- a/llvm/utils/TableGen/X86DisassemblerTables.h
+++ b/llvm/utils/TableGen/X86DisassemblerTables.h
@@ -17,15 +17,18 @@
 #define LLVM_UTILS_TABLEGEN_X86DISASSEMBLERTABLES_H
 
 #include "X86DisassemblerShared.h"
-#include "X86ModRMFilters.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/X86DisassemblerDecoderCommon.h"
 #include <map>
+#include <memory>
 #include <vector>
 
 namespace llvm {
+class raw_ostream;
 
 namespace X86Disassembler {
 
+class ModRMFilter;
+
 /// DisassemblerTables - Encapsulates all the decode tables being generated by
 ///   the table emitter.  Contains functions to populate the tables as well as
 ///   to emit them as hierarchical C structures suitable for consumption by the
diff --git a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
index 0a8d0750cf13..2a29331eb7e8 100644
--- a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
+++ b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
@@ -40,8 +40,6 @@ struct ManualMapEntry {
       : RegInstStr(RegInstStr), MemInstStr(MemInstStr), Strategy(Strategy) {}
 };
 
-class IsMatch;
-
 // List of instructions requiring explicitly aligned memory.
 const char *ExplicitAlign[] = {"MOVDQA",  "MOVAPS",  "MOVAPD",  "MOVNTPS",
                                "MOVNTPD", "MOVNTDQ", "MOVNTDQA"};
diff --git a/llvm/utils/TableGen/X86RecognizableInstr.cpp b/llvm/utils/TableGen/X86RecognizableInstr.cpp
index a9b384155965..4023d8f57318 100644
--- a/llvm/utils/TableGen/X86RecognizableInstr.cpp
+++ b/llvm/utils/TableGen/X86RecognizableInstr.cpp
@@ -15,8 +15,10 @@
 
 #include "X86RecognizableInstr.h"
 #include "X86DisassemblerShared.h"
+#include "X86DisassemblerTables.h"
 #include "X86ModRMFilters.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/TableGen/Record.h"
 #include <string>
 
 using namespace llvm;
diff --git a/llvm/utils/TableGen/X86RecognizableInstr.h b/llvm/utils/TableGen/X86RecognizableInstr.h
index d4fad2cc3f0f..8f557d9ee5f5 100644
--- a/llvm/utils/TableGen/X86RecognizableInstr.h
+++ b/llvm/utils/TableGen/X86RecognizableInstr.h
@@ -16,13 +16,16 @@
 #ifndef LLVM_UTILS_TABLEGEN_X86RECOGNIZABLEINSTR_H
 #define LLVM_UTILS_TABLEGEN_X86RECOGNIZABLEINSTR_H
 
-#include "CodeGenTarget.h"
-#include "X86DisassemblerTables.h"
+#include "CodeGenInstruction.h"
 #include "llvm/Support/DataTypes.h"
-#include "llvm/TableGen/Record.h"
+#include "llvm/Support/X86DisassemblerDecoderCommon.h"
+
+struct InstructionSpecifier;
 
 namespace llvm {
 
+class Record;
+
 #define X86_INSTR_MRM_MAPPING     \
   MAP(C0, 64)                     \
   MAP(C1, 65)                     \
@@ -153,6 +156,8 @@ namespace X86Local {
 
 namespace X86Disassembler {
 
+class DisassemblerTables;
+
 /// RecognizableInstr - Encapsulates all information required to decode a single
 ///   instruction, as extracted from the LLVM instruction tables.  Has methods
 ///   to interpret the information available in the LLVM tables, and to emit the