38 files changed, 1732 insertions, 208 deletions
diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h
index 948341554f23..282142f51bb3 100644
--- a/include/llvm/Analysis/AliasAnalysis.h
+++ b/include/llvm/Analysis/AliasAnalysis.h
@@ -949,7 +949,7 @@ template <typename DerivedT> class AAResultBase {
 
   /// A pointer to the AAResults object that this AAResult is
   /// aggregated within. May be null if not aggregated.
-  AAResults *AAR;
+  AAResults *AAR = nullptr;
 
   /// Helper to dispatch calls back through the derived type.
   DerivedT &derived() { return static_cast<DerivedT &>(*this); }
diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h
index 34a509b7f4bb..187317e3831b 100644
--- a/include/llvm/Analysis/AliasSetTracker.h
+++ b/include/llvm/Analysis/AliasSetTracker.h
@@ -87,10 +87,11 @@ class AliasSet : public ilist_node<AliasSet> {
         AAInfo = NewAAInfo;
       else {
         AAMDNodes Intersection(AAInfo.intersect(NewAAInfo));
-        if (!Intersection) {
+        if (!Intersection.TBAA || !Intersection.Scope ||
+            !Intersection.NoAlias) {
           // NewAAInfo conflicts with AAInfo.
           AAInfo = DenseMapInfo<AAMDNodes>::getTombstoneKey();
-          return SizeChanged;
+          SizeChanged = true;
         }
         AAInfo = Intersection;
       }
diff --git a/include/llvm/Analysis/AssumptionCache.h b/include/llvm/Analysis/AssumptionCache.h
index b42846472f2e..0efbd59023d6 100644
--- a/include/llvm/Analysis/AssumptionCache.h
+++ b/include/llvm/Analysis/AssumptionCache.h
@@ -73,8 +73,8 @@ class AssumptionCache {
   /// Get the vector of assumptions which affect a value from the cache.
   SmallVector<WeakTrackingVH, 1> &getOrInsertAffectedValues(Value *V);
 
-  /// Copy affected values in the cache for OV to be affected values for NV.
-  void copyAffectedValuesInCache(Value *OV, Value *NV);
+  /// Move affected values in the cache for OV to be affected values for NV.
+  void transferAffectedValuesInCache(Value *OV, Value *NV);
 
   /// Flag tracking whether we have scanned the function yet.
   ///
diff --git a/include/llvm/Analysis/CFG.h b/include/llvm/Analysis/CFG.h
index bb55e76ac86a..68f137ba622c 100644
--- a/include/llvm/Analysis/CFG.h
+++ b/include/llvm/Analysis/CFG.h
@@ -46,6 +46,8 @@ unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ);
 ///
 bool isCriticalEdge(const Instruction *TI, unsigned SuccNum,
                     bool AllowIdenticalEdges = false);
+bool isCriticalEdge(const Instruction *TI, const BasicBlock *Succ,
+                    bool AllowIdenticalEdges = false);
 
 /// Determine whether instruction 'To' is reachable from 'From', without passing
 /// through any blocks in ExclusionSet, returning true if uncertain.
diff --git a/include/llvm/Analysis/CFLAndersAliasAnalysis.h b/include/llvm/Analysis/CFLAndersAliasAnalysis.h
index 7c8b42b1d8d2..5f5e52af3d88 100644
--- a/include/llvm/Analysis/CFLAndersAliasAnalysis.h
+++ b/include/llvm/Analysis/CFLAndersAliasAnalysis.h
@@ -41,7 +41,8 @@ class CFLAndersAAResult : public AAResultBase<CFLAndersAAResult> {
   class FunctionInfo;
 
 public:
-  explicit CFLAndersAAResult(const TargetLibraryInfo &TLI);
+  explicit CFLAndersAAResult(
+      std::function<const TargetLibraryInfo &(Function &F)> GetTLI);
   CFLAndersAAResult(CFLAndersAAResult &&RHS);
   ~CFLAndersAAResult();
 
@@ -74,7 +75,7 @@ private:
   /// Build summary for a given function
   FunctionInfo buildInfoFrom(const Function &);
 
-  const TargetLibraryInfo &TLI;
+  std::function<const TargetLibraryInfo &(Function &F)> GetTLI;
 
   /// Cached mapping of Functions to their StratifiedSets.
   /// If a function's sets are currently being built, it is marked
diff --git a/include/llvm/Analysis/CFLSteensAliasAnalysis.h b/include/llvm/Analysis/CFLSteensAliasAnalysis.h
index cc7a47cd9a5f..135321616b7c 100644
--- a/include/llvm/Analysis/CFLSteensAliasAnalysis.h
+++ b/include/llvm/Analysis/CFLSteensAliasAnalysis.h
@@ -42,7 +42,8 @@ class CFLSteensAAResult : public AAResultBase<CFLSteensAAResult> {
   class FunctionInfo;
 
 public:
-  explicit CFLSteensAAResult(const TargetLibraryInfo &TLI);
+  explicit CFLSteensAAResult(
+      std::function<const TargetLibraryInfo &(Function &)> GetTLI);
   CFLSteensAAResult(CFLSteensAAResult &&Arg);
   ~CFLSteensAAResult();
 
@@ -90,7 +91,7 @@ public:
   }
 
 private:
-  const TargetLibraryInfo &TLI;
+  std::function<const TargetLibraryInfo &(Function &)> GetTLI;
 
   /// Cached mapping of Functions to their StratifiedSets.
   /// If a function's sets are currently being built, it is marked
diff --git a/include/llvm/Analysis/CGSCCPassManager.h b/include/llvm/Analysis/CGSCCPassManager.h
index 8af5fb86995a..933f2210dafc 100644
--- a/include/llvm/Analysis/CGSCCPassManager.h
+++ b/include/llvm/Analysis/CGSCCPassManager.h
@@ -88,6 +88,7 @@
 #ifndef LLVM_ANALYSIS_CGSCCPASSMANAGER_H
 #define LLVM_ANALYSIS_CGSCCPASSMANAGER_H
 
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/PriorityWorklist.h"
 #include "llvm/ADT/STLExtras.h"
@@ -583,10 +584,12 @@ public:
                       SmallVectorImpl<WeakTrackingVH> &CallHandles) {
       assert(CallHandles.empty() && "Must start with a clear set of handles.");
 
-      SmallVector<CallCount, 4> CallCounts;
+      SmallDenseMap<Function *, CallCount> CallCounts;
+      CallCount CountLocal = {0, 0};
       for (LazyCallGraph::Node &N : C) {
-        CallCounts.push_back({0, 0});
-        CallCount &Count = CallCounts.back();
+        CallCount &Count =
+            CallCounts.insert(std::make_pair(&N.getFunction(), CountLocal))
+                .first->second;
         for (Instruction &I : instructions(N.getFunction()))
           if (auto CS = CallSite(&I)) {
             if (CS.getCalledFunction()) {
@@ -626,8 +629,6 @@ public:
       // Check that we didn't miss any update scenario.
       assert(!UR.InvalidatedSCCs.count(C) && "Processing an invalid SCC!");
       assert(C->begin() != C->end() && "Cannot have an empty SCC!");
-      assert((int)CallCounts.size() == C->size() &&
-             "Cannot have changed the size of the SCC!");
 
       // Check whether any of the handles were devirtualized.
       auto IsDevirtualizedHandle = [&](WeakTrackingVH &CallH) {
@@ -642,7 +643,7 @@ public:
         if (!F)
           return false;
 
-        LLVM_DEBUG(dbgs() << "Found devirutalized call from "
+        LLVM_DEBUG(dbgs() << "Found devirtualized call from "
                           << CS.getParent()->getParent()->getName() << " to "
                           << F->getName() << "\n");
 
@@ -664,12 +665,20 @@ public:
       // manner of transformations such as DCE and other things, but seems to
       // work well in practice.
       if (!Devirt)
-        for (int i = 0, Size = C->size(); i < Size; ++i)
-          if (CallCounts[i].Indirect > NewCallCounts[i].Indirect &&
-              CallCounts[i].Direct < NewCallCounts[i].Direct) {
-            Devirt = true;
-            break;
+        // Iterate over the keys in NewCallCounts, if Function also exists in
+        // CallCounts, make the check below.
+        for (auto &Pair : NewCallCounts) {
+          auto &CallCountNew = Pair.second;
+          auto CountIt = CallCounts.find(Pair.first);
+          if (CountIt != CallCounts.end()) {
+            const auto &CallCountOld = CountIt->second;
+            if (CallCountOld.Indirect > CallCountNew.Indirect &&
+                CallCountOld.Direct < CallCountNew.Direct) {
+              Devirt = true;
+              break;
+            }
           }
+        }
 
       if (!Devirt) {
         PA.intersect(std::move(PassPA));
diff --git a/include/llvm/Analysis/CaptureTracking.h b/include/llvm/Analysis/CaptureTracking.h
index ca7abd34fea2..29921a51d5be 100644
--- a/include/llvm/Analysis/CaptureTracking.h
+++ b/include/llvm/Analysis/CaptureTracking.h
@@ -17,6 +17,7 @@ namespace llvm {
 
   class Value;
   class Use;
+  class DataLayout;
   class Instruction;
   class DominatorTree;
   class OrderedBasicBlock;
@@ -83,6 +84,11 @@ namespace llvm {
     /// use U. Return true to stop the traversal or false to continue looking
     /// for more capturing instructions.
     virtual bool captured(const Use *U) = 0;
+
+    /// isDereferenceableOrNull - Overload to allow clients with additional
+    /// knowledge about pointer dereferenceability to provide it and thereby
+    /// avoid conservative responses when a pointer is compared to null.
+    virtual bool isDereferenceableOrNull(Value *O, const DataLayout &DL);
   };
 
   /// PointerMayBeCaptured - Visit the value and the values derived from it and
diff --git a/include/llvm/Analysis/DDG.h b/include/llvm/Analysis/DDG.h
new file mode 100644
index 000000000000..0e1eb9d2cda3
--- /dev/null
+++ b/include/llvm/Analysis/DDG.h
@@ -0,0 +1,430 @@
+//===- llvm/Analysis/DDG.h --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Data-Dependence Graph (DDG).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_DDG_H
+#define LLVM_ANALYSIS_DDG_H
+
+#include "llvm/ADT/DirectedGraph.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/DependenceGraphBuilder.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/IR/Instructions.h"
+#include <unordered_map>
+
+namespace llvm {
+class DDGNode;
+class DDGEdge;
+using DDGNodeBase = DGNode<DDGNode, DDGEdge>;
+using DDGEdgeBase = DGEdge<DDGNode, DDGEdge>;
+using DDGBase = DirectedGraph<DDGNode, DDGEdge>;
+class LPMUpdater;
+
+/// Data Dependence Graph Node
+/// The graph can represent the following types of nodes:
+/// 1. Single instruction node containing just one instruction.
+/// 2. Multiple instruction node where two or more instructions from
+///    the same basic block are merged into one node.
+/// 3. Root node is a special node that connects to all components such that
+///    there is always a path from it to any node in the graph.
+class DDGNode : public DDGNodeBase {
+public:
+  using InstructionListType = SmallVectorImpl<Instruction *>;
+
+  enum class NodeKind {
+    Unknown,
+    SingleInstruction,
+    MultiInstruction,
+    Root,
+  };
+
+  DDGNode() = delete;
+  DDGNode(const NodeKind K) : DDGNodeBase(), Kind(K) {}
+  DDGNode(const DDGNode &N) : DDGNodeBase(N), Kind(N.Kind) {}
+  DDGNode(DDGNode &&N) : DDGNodeBase(std::move(N)), Kind(N.Kind) {}
+  virtual ~DDGNode() = 0;
+
+  DDGNode &operator=(const DDGNode &N) {
+    DGNode::operator=(N);
+    Kind = N.Kind;
+    return *this;
+  }
+
+  DDGNode &operator=(DDGNode &&N) {
+    DGNode::operator=(std::move(N));
+    Kind = N.Kind;
+    return *this;
+  }
+
+  /// Getter for the kind of this node.
+  NodeKind getKind() const { return Kind; }
+
+  /// Collect a list of instructions, in \p IList, for which predicate \p Pred
+  /// evaluates to true when iterating over instructions of this node. Return
+  /// true if at least one instruction was collected, and false otherwise.
+  bool collectInstructions(llvm::function_ref<bool(Instruction *)> const &Pred,
+                           InstructionListType &IList) const;
+
+protected:
+  /// Setter for the kind of this node.
+  void setKind(NodeKind K) { Kind = K; }
+
+private:
+  NodeKind Kind;
+};
+
+/// Subclass of DDGNode representing the root node of the graph.
+/// There should only be one such node in a given graph.
+class RootDDGNode : public DDGNode {
+public:
+  RootDDGNode() : DDGNode(NodeKind::Root) {}
+  RootDDGNode(const RootDDGNode &N) = delete;
+  RootDDGNode(RootDDGNode &&N) : DDGNode(std::move(N)) {}
+  ~RootDDGNode() {}
+
+  /// Define classof to be able to use isa<>, cast<>, dyn_cast<>, etc.
+  static bool classof(const DDGNode *N) {
+    return N->getKind() == NodeKind::Root;
+  }
+  static bool classof(const RootDDGNode *N) { return true; }
+};
+
+/// Subclass of DDGNode representing single or multi-instruction nodes.
+class SimpleDDGNode : public DDGNode {
+public:
+  SimpleDDGNode() = delete;
+  SimpleDDGNode(Instruction &I);
+  SimpleDDGNode(const SimpleDDGNode &N);
+  SimpleDDGNode(SimpleDDGNode &&N);
+  ~SimpleDDGNode();
+
+  SimpleDDGNode &operator=(const SimpleDDGNode &N) {
+    DDGNode::operator=(N);
+    InstList = N.InstList;
+    return *this;
+  }
+
+  SimpleDDGNode &operator=(SimpleDDGNode &&N) {
+    DDGNode::operator=(std::move(N));
+    InstList = std::move(N.InstList);
+    return *this;
+  }
+
+  /// Get the list of instructions in this node.
+  const InstructionListType &getInstructions() const {
+    assert(!InstList.empty() && "Instruction List is empty.");
+    return InstList;
+  }
+  InstructionListType &getInstructions() {
+    return const_cast<InstructionListType &>(
+        static_cast<const SimpleDDGNode *>(this)->getInstructions());
+  }
+
+  /// Get the first/last instruction in the node.
+  Instruction *getFirstInstruction() const { return getInstructions().front(); }
+  Instruction *getLastInstruction() const { return getInstructions().back(); }
+
+  /// Define classof to be able to use isa<>, cast<>, dyn_cast<>, etc.
+  static bool classof(const DDGNode *N) {
+    return N->getKind() == NodeKind::SingleInstruction ||
+           N->getKind() == NodeKind::MultiInstruction;
+  }
+  static bool classof(const SimpleDDGNode *N) { return true; }
+
+private:
+  /// Append the list of instructions in \p Input to this node.
+  void appendInstructions(const InstructionListType &Input) {
+    setKind((InstList.size() == 0 && Input.size() == 1)
+                ? NodeKind::SingleInstruction
+                : NodeKind::MultiInstruction);
+    InstList.insert(InstList.end(), Input.begin(), Input.end());
+  }
+  void appendInstructions(const SimpleDDGNode &Input) {
+    appendInstructions(Input.getInstructions());
+  }
+
+  /// List of instructions associated with a single or multi-instruction node.
+  SmallVector<Instruction *, 2> InstList;
+};
+
+/// Data Dependency Graph Edge.
+/// An edge in the DDG can represent a def-use relationship or
+/// a memory dependence based on the result of DependenceAnalysis.
+/// A rooted edge connects the root node to one of the components
+/// of the graph.
+class DDGEdge : public DDGEdgeBase {
+public:
+  /// The kind of edge in the DDG
+  enum class EdgeKind { Unknown, RegisterDefUse, MemoryDependence, Rooted };
+
+  explicit DDGEdge(DDGNode &N) = delete;
+  DDGEdge(DDGNode &N, EdgeKind K) : DDGEdgeBase(N), Kind(K) {}
+  DDGEdge(const DDGEdge &E) : DDGEdgeBase(E), Kind(E.getKind()) {}
+  DDGEdge(DDGEdge &&E) : DDGEdgeBase(std::move(E)), Kind(E.Kind) {}
+  DDGEdge &operator=(const DDGEdge &E) {
+    DDGEdgeBase::operator=(E);
+    Kind = E.Kind;
+    return *this;
+  }
+
+  DDGEdge &operator=(DDGEdge &&E) {
+    DDGEdgeBase::operator=(std::move(E));
+    Kind = E.Kind;
+    return *this;
+  }
+
+  /// Get the edge kind
+  EdgeKind getKind() const { return Kind; };
+
+  /// Return true if this is a def-use edge, and false otherwise.
+  bool isDefUse() const { return Kind == EdgeKind::RegisterDefUse; }
+
+  /// Return true if this is a memory dependence edge, and false otherwise.
+  bool isMemoryDependence() const { return Kind == EdgeKind::MemoryDependence; }
+
+  /// Return true if this is an edge stemming from the root node, and false
+  /// otherwise.
+  bool isRooted() const { return Kind == EdgeKind::Rooted; }
+
+private:
+  EdgeKind Kind;
+};
+
+/// Encapsulate some common data and functionality needed for different
+/// variations of data dependence graphs.
+template <typename NodeType> class DependenceGraphInfo {
+public:
+  using DependenceList = SmallVector<std::unique_ptr<Dependence>, 1>;
+
+  DependenceGraphInfo() = delete;
+  DependenceGraphInfo(const DependenceGraphInfo &G) = delete;
+  DependenceGraphInfo(const std::string &N, const DependenceInfo &DepInfo)
+      : Name(N), DI(DepInfo), Root(nullptr) {}
+  DependenceGraphInfo(DependenceGraphInfo &&G)
+      : Name(std::move(G.Name)), DI(std::move(G.DI)), Root(G.Root) {}
+  virtual ~DependenceGraphInfo() {}
+
+  /// Return the label that is used to name this graph.
+  const StringRef getName() const { return Name; }
+
+  /// Return the root node of the graph.
+  NodeType &getRoot() const {
+    assert(Root && "Root node is not available yet. Graph construction may "
+                   "still be in progress\n");
+    return *Root;
+  }
+
+protected:
+  // Name of the graph.
+  std::string Name;
+
+  // Store a copy of DependenceInfo in the graph, so that individual memory
+  // dependencies don't need to be stored. Instead when the dependence is
+  // queried it is recomputed using @DI.
+  const DependenceInfo DI;
+
+  // A special node in the graph that has an edge to every connected component of
+  // the graph, to ensure all nodes are reachable in a graph walk.
+  NodeType *Root = nullptr;
+};
+
+using DDGInfo = DependenceGraphInfo<DDGNode>;
+
+/// Data Dependency Graph
+class DataDependenceGraph : public DDGBase, public DDGInfo {
+  friend class DDGBuilder;
+
+public:
+  using NodeType = DDGNode;
+  using EdgeType = DDGEdge;
+
+  DataDependenceGraph() = delete;
+  DataDependenceGraph(const DataDependenceGraph &G) = delete;
+  DataDependenceGraph(DataDependenceGraph &&G)
+      : DDGBase(std::move(G)), DDGInfo(std::move(G)) {}
+  DataDependenceGraph(Function &F, DependenceInfo &DI);
+  DataDependenceGraph(const Loop &L, DependenceInfo &DI);
+  ~DataDependenceGraph();
+
+protected:
+  /// Add node \p N to the graph, if it's not added yet, and keep track of
+  /// the root node. Return true if node is successfully added.
+  bool addNode(NodeType &N);
+
+};
+
+/// Concrete implementation of a pure data dependence graph builder. This class
+/// provides custom implementation for the pure-virtual functions used in the
+/// generic dependence graph build algorithm.
+///
+/// For information about time complexity of the build algorithm see the
+/// comments near the declaration of AbstractDependenceGraphBuilder.
+class DDGBuilder : public AbstractDependenceGraphBuilder<DataDependenceGraph> {
+public:
+  DDGBuilder(DataDependenceGraph &G, DependenceInfo &D,
+             const BasicBlockListType &BBs)
+      : AbstractDependenceGraphBuilder(G, D, BBs) {}
+  DDGNode &createRootNode() final override {
+    auto *RN = new RootDDGNode();
+    assert(RN && "Failed to allocate memory for DDG root node.");
+    Graph.addNode(*RN);
+    return *RN;
+  }
+  DDGNode &createFineGrainedNode(Instruction &I) final override {
+    auto *SN = new SimpleDDGNode(I);
+    assert(SN && "Failed to allocate memory for simple DDG node.");
+    Graph.addNode(*SN);
+    return *SN;
+  }
+  DDGEdge &createDefUseEdge(DDGNode &Src, DDGNode &Tgt) final override {
+    auto *E = new DDGEdge(Tgt, DDGEdge::EdgeKind::RegisterDefUse);
+    assert(E && "Failed to allocate memory for edge");
+    Graph.connect(Src, Tgt, *E);
+    return *E;
+  }
+  DDGEdge &createMemoryEdge(DDGNode &Src, DDGNode &Tgt) final override {
+    auto *E = new DDGEdge(Tgt, DDGEdge::EdgeKind::MemoryDependence);
+    assert(E && "Failed to allocate memory for edge");
+    Graph.connect(Src, Tgt, *E);
+    return *E;
+  }
+  DDGEdge &createRootedEdge(DDGNode &Src, DDGNode &Tgt) final override {
+    auto *E = new DDGEdge(Tgt, DDGEdge::EdgeKind::Rooted);
+    assert(E && "Failed to allocate memory for edge");
+    assert(isa<RootDDGNode>(Src) && "Expected root node");
+    Graph.connect(Src, Tgt, *E);
+    return *E;
+  }
+
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const DDGNode &N);
+raw_ostream &operator<<(raw_ostream &OS, const DDGNode::NodeKind K);
+raw_ostream &operator<<(raw_ostream &OS, const DDGEdge &E);
+raw_ostream &operator<<(raw_ostream &OS, const DDGEdge::EdgeKind K);
+raw_ostream &operator<<(raw_ostream &OS, const DataDependenceGraph &G);
+
+//===--------------------------------------------------------------------===//
+// DDG Analysis Passes
+//===--------------------------------------------------------------------===//
+
+/// Analysis pass that builds the DDG for a loop.
+class DDGAnalysis : public AnalysisInfoMixin<DDGAnalysis> {
+public:
+  using Result = std::unique_ptr<DataDependenceGraph>;
+  Result run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR);
+
+private:
+  friend AnalysisInfoMixin<DDGAnalysis>;
+  static AnalysisKey Key;
+};
+
+/// Textual printer pass for the DDG of a loop.
+class DDGAnalysisPrinterPass : public PassInfoMixin<DDGAnalysisPrinterPass> {
+public:
+  explicit DDGAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {}
+  PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
+                        LoopStandardAnalysisResults &AR, LPMUpdater &U);
+
+private:
+  raw_ostream &OS;
+};
+
+//===--------------------------------------------------------------------===//
+// GraphTraits specializations for the DDG
+//===--------------------------------------------------------------------===//
+
+/// non-const versions of the grapth trait specializations for DDG
+template <> struct GraphTraits<DDGNode *> {
+  using NodeRef = DDGNode *;
+
+  static DDGNode *DDGGetTargetNode(DGEdge<DDGNode, DDGEdge> *P) {
+    return &P->getTargetNode();
+  }
+
+  // Provide a mapped iterator so that the GraphTrait-based implementations can
+  // find the target nodes without having to explicitly go through the edges.
+  using ChildIteratorType =
+      mapped_iterator<DDGNode::iterator, decltype(&DDGGetTargetNode)>;
+  using ChildEdgeIteratorType = DDGNode::iterator;
+
+  static NodeRef getEntryNode(NodeRef N) { return N; }
+  static ChildIteratorType child_begin(NodeRef N) {
+    return ChildIteratorType(N->begin(), &DDGGetTargetNode);
+  }
+  static ChildIteratorType child_end(NodeRef N) {
+    return ChildIteratorType(N->end(), &DDGGetTargetNode);
+  }
+
+  static ChildEdgeIteratorType child_edge_begin(NodeRef N) {
+    return N->begin();
+  }
+  static ChildEdgeIteratorType child_edge_end(NodeRef N) { return N->end(); }
+};
+
+template <>
+struct GraphTraits<DataDependenceGraph *> : public GraphTraits<DDGNode *> {
+  using nodes_iterator = DataDependenceGraph::iterator;
+  static NodeRef getEntryNode(DataDependenceGraph *DG) {
+    return &DG->getRoot();
+  }
+  static nodes_iterator nodes_begin(DataDependenceGraph *DG) {
+    return DG->begin();
+  }
+  static nodes_iterator nodes_end(DataDependenceGraph *DG) { return DG->end(); }
+};
+
+/// const versions of the grapth trait specializations for DDG
+template <> struct GraphTraits<const DDGNode *> {
+  using NodeRef = const DDGNode *;
+
+  static const DDGNode *DDGGetTargetNode(const DGEdge<DDGNode, DDGEdge> *P) {
+    return &P->getTargetNode();
+  }
+
+  // Provide a mapped iterator so that the GraphTrait-based implementations can
+  // find the target nodes without having to explicitly go through the edges.
+  using ChildIteratorType =
+      mapped_iterator<DDGNode::const_iterator, decltype(&DDGGetTargetNode)>;
+  using ChildEdgeIteratorType = DDGNode::const_iterator;
+
+  static NodeRef getEntryNode(NodeRef N) { return N; }
+  static ChildIteratorType child_begin(NodeRef N) {
+    return ChildIteratorType(N->begin(), &DDGGetTargetNode);
+  }
+  static ChildIteratorType child_end(NodeRef N) {
+    return ChildIteratorType(N->end(), &DDGGetTargetNode);
+  }
+
+  static ChildEdgeIteratorType child_edge_begin(NodeRef N) {
+    return N->begin();
+  }
+  static ChildEdgeIteratorType child_edge_end(NodeRef N) { return N->end(); }
+};
+
+template <>
+struct GraphTraits<const DataDependenceGraph *>
+    : public GraphTraits<const DDGNode *> {
+  using nodes_iterator = DataDependenceGraph::const_iterator;
+  static NodeRef getEntryNode(const DataDependenceGraph *DG) {
+    return &DG->getRoot();
+  }
+  static nodes_iterator nodes_begin(const DataDependenceGraph *DG) {
+    return DG->begin();
+  }
+  static nodes_iterator nodes_end(const DataDependenceGraph *DG) {
+    return DG->end();
+  }
+};
+
+} // namespace llvm
+
+#endif // LLVM_ANALYSIS_DDG_H
diff --git a/include/llvm/Analysis/DOTGraphTraitsPass.h b/include/llvm/Analysis/DOTGraphTraitsPass.h
index 0410a3314659..c9e8df5db1c2 100644
--- a/include/llvm/Analysis/DOTGraphTraitsPass.h
+++ b/include/llvm/Analysis/DOTGraphTraitsPass.h
@@ -99,7 +99,7 @@ public:
 
     errs() << "Writing '" << Filename << "'...";
 
-    raw_fd_ostream File(Filename, EC, sys::fs::F_Text);
+    raw_fd_ostream File(Filename, EC, sys::fs::OF_Text);
     std::string GraphName = DOTGraphTraits<GraphT>::getGraphName(Graph);
     std::string Title = GraphName + " for '" + F.getName().str() + "' function";
 
@@ -162,7 +162,7 @@ public:
 
     errs() << "Writing '" << Filename << "'...";
 
-    raw_fd_ostream File(Filename, EC, sys::fs::F_Text);
+    raw_fd_ostream File(Filename, EC, sys::fs::OF_Text);
     std::string Title = DOTGraphTraits<GraphT>::getGraphName(Graph);
 
     if (!EC)
diff --git a/include/llvm/Analysis/DependenceGraphBuilder.h b/include/llvm/Analysis/DependenceGraphBuilder.h
new file mode 100644
index 000000000000..5f4bdb47043b
--- /dev/null
+++ b/include/llvm/Analysis/DependenceGraphBuilder.h
@@ -0,0 +1,119 @@
+//===- llvm/Analysis/DependenceGraphBuilder.h -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a builder interface that can be used to populate dependence
+// graphs such as DDG and PDG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H
+#define LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H
+
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Instructions.h"
+
+namespace llvm {
+
+/// This abstract builder class defines a set of high-level steps for creating
+/// DDG-like graphs. The client code is expected to inherit from this class and
+/// define concrete implementation for each of the pure virtual functions used
+/// in the high-level algorithm.
+template <class GraphType> class AbstractDependenceGraphBuilder {
+protected:
+  using BasicBlockListType = SmallVectorImpl<BasicBlock *>;
+
+private:
+  using NodeType = typename GraphType::NodeType;
+  using EdgeType = typename GraphType::EdgeType;
+
+public:
+  using ClassesType = EquivalenceClasses<BasicBlock *>;
+  using NodeListType = SmallVector<NodeType *, 4>;
+
+  AbstractDependenceGraphBuilder(GraphType &G, DependenceInfo &D,
+                                 const BasicBlockListType &BBs)
+      : Graph(G), DI(D), BBList(BBs) {}
+  virtual ~AbstractDependenceGraphBuilder() {}
+
+  /// The main entry to the graph construction algorithm. It starts by
+  /// creating nodes in increasing order of granularity and then
+  /// adds def-use and memory edges.
+  ///
+  /// The algorithmic complexity of this implementation is O(V^2 * I^2), where V
+  /// is the number of vertecies (nodes) and I is the number of instructions in
+  /// each node. The total number of instructions, N, is equal to V * I,
+  /// therefore the worst-case time complexity is O(N^2). The average time
+  /// complexity is O((N^2)/2).
+  void populate() {
+    createFineGrainedNodes();
+    createDefUseEdges();
+    createMemoryDependencyEdges();
+    createAndConnectRootNode();
+  }
+
+  /// Create fine grained nodes. These are typically atomic nodes that
+  /// consist of a single instruction.
+  void createFineGrainedNodes();
+
+  /// Analyze the def-use chains and create edges from the nodes containing
+  /// definitions to the nodes containing the uses.
+  void createDefUseEdges();
+
+  /// Analyze data dependencies that exist between memory loads or stores,
+  /// in the graph nodes and create edges between them.
+  void createMemoryDependencyEdges();
+
+  /// Create a root node and add edges such that each node in the graph is
+  /// reachable from the root.
+  void createAndConnectRootNode();
+
+protected:
+  /// Create the root node of the graph.
+  virtual NodeType &createRootNode() = 0;
+
+  /// Create an atomic node in the graph given a single instruction.
+  virtual NodeType &createFineGrainedNode(Instruction &I) = 0;
+
+  /// Create a def-use edge going from \p Src to \p Tgt.
+  virtual EdgeType &createDefUseEdge(NodeType &Src, NodeType &Tgt) = 0;
+
+  /// Create a memory dependence edge going from \p Src to \p Tgt.
+  virtual EdgeType &createMemoryEdge(NodeType &Src, NodeType &Tgt) = 0;
+
+  /// Create a rooted edge going from \p Src to \p Tgt .
+  virtual EdgeType &createRootedEdge(NodeType &Src, NodeType &Tgt) = 0;
+
+  /// Deallocate memory of edge \p E.
+  virtual void destroyEdge(EdgeType &E) { delete &E; }
+
+  /// Deallocate memory of node \p N.
+  virtual void destroyNode(NodeType &N) { delete &N; }
+
+  /// Map types to map instructions to nodes used when populating the graph.
+  using InstToNodeMap = DenseMap<Instruction *, NodeType *>;
+
+  /// Reference to the graph that gets built by a concrete implementation of
+  /// this builder.
+  GraphType &Graph;
+
+  /// Dependence information used to create memory dependence edges in the
+  /// graph.
+  DependenceInfo &DI;
+
+  /// The list of basic blocks to consider when building the graph.
+  const BasicBlockListType &BBList;
+
+  /// A mapping from instructions to the corresponding nodes in the graph.
+  InstToNodeMap IMap;
+};
+
+} // namespace llvm
+
+#endif // LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H
diff --git a/include/llvm/Analysis/DivergenceAnalysis.h b/include/llvm/Analysis/DivergenceAnalysis.h
index 3cfb9d13df94..2fac9c8b4b34 100644
--- a/include/llvm/Analysis/DivergenceAnalysis.h
+++ b/include/llvm/Analysis/DivergenceAnalysis.h
@@ -73,9 +73,12 @@ public:
   /// operands
   bool isAlwaysUniform(const Value &Val) const;
 
-  /// \brief Whether \p Val is a divergent value
+  /// \brief Whether \p Val is divergent at its definition.
   bool isDivergent(const Value &Val) const;
 
+  /// \brief Whether \p U is divergent. Uses of a uniform value can be divergent.
+  bool isDivergentUse(const Use &U) const;
+
   void print(raw_ostream &OS, const Module *) const;
 
 private:
@@ -189,12 +192,19 @@ public:
   /// The GPU kernel this analysis result is for
   const Function &getFunction() const { return DA.getFunction(); }
 
-  /// Whether \p V is divergent.
+  /// Whether \p V is divergent at its definition.
   bool isDivergent(const Value &V) const;
 
-  /// Whether \p V is uniform/non-divergent
+  /// Whether \p U is divergent. Uses of a uniform value can be divergent.
+  bool isDivergentUse(const Use &U) const;
+
+  /// Whether \p V is uniform/non-divergent.
   bool isUniform(const Value &V) const { return !isDivergent(V); }
 
+  /// Whether \p U is uniform/non-divergent. Uses of a uniform value can be
+  /// divergent.
+  bool isUniformUse(const Use &U) const { return !isDivergentUse(U); }
+
   /// Print all divergent values in the kernel.
   void print(raw_ostream &OS, const Module *) const;
 };
diff --git a/include/llvm/Analysis/GlobalsModRef.h b/include/llvm/Analysis/GlobalsModRef.h
index d3fcfc2d41ab..5d1c5a05206a 100644
--- a/include/llvm/Analysis/GlobalsModRef.h
+++ b/include/llvm/Analysis/GlobalsModRef.h
@@ -34,7 +34,7 @@ class GlobalsAAResult : public AAResultBase<GlobalsAAResult> {
   class FunctionInfo;
 
   const DataLayout &DL;
-  const TargetLibraryInfo &TLI;
+  std::function<const TargetLibraryInfo &(Function &F)> GetTLI;
 
   /// The globals that do not have their addresses taken.
   SmallPtrSet<const GlobalValue *, 8> NonAddressTakenGlobals;
@@ -72,14 +72,18 @@ class GlobalsAAResult : public AAResultBase<GlobalsAAResult> {
   /// could perform to the memory utilization here if this becomes a problem.
   std::list<DeletionCallbackHandle> Handles;
 
-  explicit GlobalsAAResult(const DataLayout &DL, const TargetLibraryInfo &TLI);
+  explicit GlobalsAAResult(
+      const DataLayout &DL,
+      std::function<const TargetLibraryInfo &(Function &F)> GetTLI);
 
 public:
   GlobalsAAResult(GlobalsAAResult &&Arg);
   ~GlobalsAAResult();
 
-  static GlobalsAAResult analyzeModule(Module &M, const TargetLibraryInfo &TLI,
-                                       CallGraph &CG);
+  static GlobalsAAResult
+  analyzeModule(Module &M,
+                std::function<const TargetLibraryInfo &(Function &F)> GetTLI,
+                CallGraph &CG);
 
   //------------------------------------------------
   // Implement the AliasAnalysis API
diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h
index 054ffca7215e..a5ffca13046b 100644
--- a/include/llvm/Analysis/InstructionSimplify.h
+++ b/include/llvm/Analysis/InstructionSimplify.h
@@ -31,6 +31,7 @@
 #ifndef LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H
 #define LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H
 
+#include "llvm/ADT/SetVector.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/User.h"
@@ -141,6 +142,13 @@ Value *SimplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF,
 Value *SimplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF,
                         const SimplifyQuery &Q);
 
+/// Given operands for the multiplication of a FMA, fold the result or return
+/// null. In contrast to SimplifyFMulInst, this function will not perform
+/// simplifications whose unrounded results differ when rounded to the argument
+/// type.
+Value *SimplifyFMAFMul(Value *LHS, Value *RHS, FastMathFlags FMF,
+                       const SimplifyQuery &Q);
+
 /// Given operands for a Mul, fold the result or return null.
 Value *SimplifyMulInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
 
@@ -234,21 +242,19 @@ Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
 /// Given operand for a UnaryOperator, fold the result or return null.
 Value *SimplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q);
 
-/// Given operand for an FP UnaryOperator, fold the result or return null.
-/// In contrast to SimplifyUnOp, try to use FastMathFlag when folding the
-/// result. In case we don't need FastMathFlags, simply fall to SimplifyUnOp.
-Value *SimplifyFPUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF,
-                      const SimplifyQuery &Q);
+/// Given operand for a UnaryOperator, fold the result or return null.
+/// Try to use FastMathFlags when folding the result.
+Value *SimplifyUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF,
+                    const SimplifyQuery &Q);
 
 /// Given operands for a BinaryOperator, fold the result or return null.
 Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
                      const SimplifyQuery &Q);
 
-/// Given operands for an FP BinaryOperator, fold the result or return null.
-/// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the
-/// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp.
-Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
-                       FastMathFlags FMF, const SimplifyQuery &Q);
+/// Given operands for a BinaryOperator, fold the result or return null.
+/// Try to use FastMathFlags when folding the result.
+Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+                     FastMathFlags FMF, const SimplifyQuery &Q);
 
 /// Given a callsite, fold the result or return null.
 Value *SimplifyCall(CallBase *Call, const SimplifyQuery &Q);
@@ -263,12 +269,14 @@ Value *SimplifyInstruction(Instruction *I, const SimplifyQuery &Q,
 /// This first performs a normal RAUW of I with SimpleV. It then recursively
 /// attempts to simplify those users updated by the operation. The 'I'
 /// instruction must not be equal to the simplified value 'SimpleV'.
+/// If UnsimplifiedUsers is provided, instructions that could not be simplified
+/// are added to it.
 ///
 /// The function returns true if any simplifications were performed.
-bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV,
-                                   const TargetLibraryInfo *TLI = nullptr,
-                                   const DominatorTree *DT = nullptr,
-                                   AssumptionCache *AC = nullptr);
+bool replaceAndRecursivelySimplify(
+    Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI = nullptr,
+    const DominatorTree *DT = nullptr, AssumptionCache *AC = nullptr,
+    SmallSetVector<Instruction *, 8> *UnsimplifiedUsers = nullptr);
 
 /// Recursively attempt to simplify an instruction.
 ///
diff --git a/include/llvm/Analysis/LazyCallGraph.h b/include/llvm/Analysis/LazyCallGraph.h
index 2d83929211e2..20a35bef189b 100644
--- a/include/llvm/Analysis/LazyCallGraph.h
+++ b/include/llvm/Analysis/LazyCallGraph.h
@@ -931,7 +931,8 @@ public:
   /// This sets up the graph and computes all of the entry points of the graph.
   /// No function definitions are scanned until their nodes in the graph are
   /// requested during traversal.
-  LazyCallGraph(Module &M, TargetLibraryInfo &TLI);
+  LazyCallGraph(Module &M,
+                function_ref<TargetLibraryInfo &(Function &)> GetTLI);
 
   LazyCallGraph(LazyCallGraph &&G);
   LazyCallGraph &operator=(LazyCallGraph &&RHS);
@@ -1267,7 +1268,12 @@ public:
   /// This just builds the set of entry points to the call graph. The rest is
   /// built lazily as it is walked.
   LazyCallGraph run(Module &M, ModuleAnalysisManager &AM) {
-    return LazyCallGraph(M, AM.getResult<TargetLibraryAnalysis>(M));
+    FunctionAnalysisManager &FAM =
+        AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+    auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
+      return FAM.getResult<TargetLibraryAnalysis>(F);
+    };
+    return LazyCallGraph(M, GetTLI);
   }
 };
 
diff --git a/include/llvm/Analysis/LegacyDivergenceAnalysis.h b/include/llvm/Analysis/LegacyDivergenceAnalysis.h
index 0a338b816640..e33b8f4129f3 100644
--- a/include/llvm/Analysis/LegacyDivergenceAnalysis.h
+++ b/include/llvm/Analysis/LegacyDivergenceAnalysis.h
@@ -39,17 +39,18 @@ public:
   void print(raw_ostream &OS, const Module *) const override;
 
   // Returns true if V is divergent at its definition.
-  //
-  // Even if this function returns false, V may still be divergent when used
-  // in a different basic block.
   bool isDivergent(const Value *V) const;
 
+  // Returns true if U is divergent. Uses of a uniform value can be divergent.
+  bool isDivergentUse(const Use *U) const;
+
   // Returns true if V is uniform/non-divergent.
-  //
-  // Even if this function returns true, V may still be divergent when used
-  // in a different basic block.
   bool isUniform(const Value *V) const { return !isDivergent(V); }
 
+  // Returns true if U is uniform/non-divergent. Uses of a uniform value can be
+  // divergent.
+  bool isUniformUse(const Use *U) const { return !isDivergentUse(U); }
+
   // Keep the analysis results uptodate by removing an erased value.
   void removeValue(const Value *V) { DivergentValues.erase(V); }
 
@@ -62,6 +63,9 @@ private:
 
   // Stores all divergent values.
   DenseSet<const Value *> DivergentValues;
+
+  // Stores divergent uses of possibly uniform values.
+  DenseSet<const Use *> DivergentUses;
 };
 } // End llvm namespace
 
diff --git a/include/llvm/Analysis/Loads.h b/include/llvm/Analysis/Loads.h
index 5df6bb02308d..9604b2521e89 100644
--- a/include/llvm/Analysis/Loads.h
+++ b/include/llvm/Analysis/Loads.h
@@ -20,7 +20,9 @@
 namespace llvm {
 
 class DataLayout;
+class Loop;
 class MDNode;
+class ScalarEvolution;
 
 /// Return true if this is always a dereferenceable pointer. If the context
 /// instruction is specified perform context-sensitive analysis and return true
@@ -35,7 +37,8 @@ bool isDereferenceablePointer(const Value *V, Type *Ty,
 /// performs context-sensitive analysis and returns true if the pointer is
 /// dereferenceable at the specified instruction.
 bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty,
-                                        unsigned Align, const DataLayout &DL,
+                                        MaybeAlign Alignment,
+                                        const DataLayout &DL,
                                         const Instruction *CtxI = nullptr,
                                         const DominatorTree *DT = nullptr);
 
@@ -43,7 +46,7 @@ bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty,
 /// greater or equal than requested. If the context instruction is specified
 /// performs context-sensitive analysis and returns true if the pointer is
 /// dereferenceable at the specified instruction.
-bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
+bool isDereferenceableAndAlignedPointer(const Value *V, Align Alignment,
                                         const APInt &Size, const DataLayout &DL,
                                         const Instruction *CtxI = nullptr,
                                         const DominatorTree *DT = nullptr);
@@ -56,11 +59,22 @@ bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
 /// If it is not obviously safe to load from the specified pointer, we do a
 /// quick local scan of the basic block containing ScanFrom, to determine if
 /// the address is already accessed.
-bool isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size,
+bool isSafeToLoadUnconditionally(Value *V, MaybeAlign Alignment, APInt &Size,
                                  const DataLayout &DL,
                                  Instruction *ScanFrom = nullptr,
                                  const DominatorTree *DT = nullptr);
 
+/// Return true if we can prove that the given load (which is assumed to be
+/// within the specified loop) would access only dereferenceable memory, and
+/// be properly aligned on every iteration of the specified loop regardless of
+/// its placement within the loop. (i.e. does not require predication beyond
+/// that required by the the header itself and could be hoisted into the header
+/// if desired.)  This is more powerful than the variants above when the
+/// address loaded from is analyzeable by SCEV.  
+bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
+                                       ScalarEvolution &SE,
+                                       DominatorTree &DT);
+
 /// Return true if we know that executing a load from this value cannot trap.
 ///
 /// If DT and ScanFrom are specified this method performs context-sensitive
@@ -69,7 +83,7 @@ bool isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size,
 /// If it is not obviously safe to load from the specified pointer, we do a
 /// quick local scan of the basic block containing ScanFrom, to determine if
 /// the address is already accessed.
-bool isSafeToLoadUnconditionally(Value *V, Type *Ty, unsigned Align,
+bool isSafeToLoadUnconditionally(Value *V, Type *Ty, MaybeAlign Alignment,
                                  const DataLayout &DL,
                                  Instruction *ScanFrom = nullptr,
                                  const DominatorTree *DT = nullptr);
diff --git a/include/llvm/Analysis/LoopAnalysisManager.h b/include/llvm/Analysis/LoopAnalysisManager.h
index 368a810cfa67..a2e65a7310af 100644
--- a/include/llvm/Analysis/LoopAnalysisManager.h
+++ b/include/llvm/Analysis/LoopAnalysisManager.h
@@ -86,8 +86,9 @@ typedef InnerAnalysisManagerProxy<LoopAnalysisManager, Function>
 template <> class LoopAnalysisManagerFunctionProxy::Result {
 public:
   explicit Result(LoopAnalysisManager &InnerAM, LoopInfo &LI)
-      : InnerAM(&InnerAM), LI(&LI) {}
-  Result(Result &&Arg) : InnerAM(std::move(Arg.InnerAM)), LI(Arg.LI) {
+      : InnerAM(&InnerAM), LI(&LI), MSSAUsed(false) {}
+  Result(Result &&Arg)
+      : InnerAM(std::move(Arg.InnerAM)), LI(Arg.LI), MSSAUsed(Arg.MSSAUsed) {
     // We have to null out the analysis manager in the moved-from state
     // because we are taking ownership of the responsibilty to clear the
     // analysis state.
@@ -96,6 +97,7 @@ public:
   Result &operator=(Result &&RHS) {
     InnerAM = RHS.InnerAM;
     LI = RHS.LI;
+    MSSAUsed = RHS.MSSAUsed;
     // We have to null out the analysis manager in the moved-from state
     // because we are taking ownership of the responsibilty to clear the
     // analysis state.
@@ -112,6 +114,9 @@ public:
     InnerAM->clear();
   }
 
+  /// Mark MemorySSA as used so we can invalidate self if MSSA is invalidated.
+  void markMSSAUsed() { MSSAUsed = true; }
+
   /// Accessor for the analysis manager.
   LoopAnalysisManager &getManager() { return *InnerAM; }
 
@@ -130,6 +135,7 @@ public:
 private:
   LoopAnalysisManager *InnerAM;
   LoopInfo *LI;
+  bool MSSAUsed;
 };
 
 /// Provide a specialized run method for the \c LoopAnalysisManagerFunctionProxy
diff --git a/include/llvm/Analysis/LoopCacheAnalysis.h b/include/llvm/Analysis/LoopCacheAnalysis.h
new file mode 100644
index 000000000000..ffec78b6db2c
--- /dev/null
+++ b/include/llvm/Analysis/LoopCacheAnalysis.h
@@ -0,0 +1,281 @@
+//===- llvm/Analysis/LoopCacheAnalysis.h ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines the interface for the loop cache analysis.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
+#define LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+class LPMUpdater;
+using CacheCostTy = int64_t;
+using LoopVectorTy = SmallVector<Loop *, 8>;
+
+/// Represents a memory reference as a base pointer and a set of indexing
+/// operations. For example given the array reference A[i][2j+1][3k+2] in a
+/// 3-dim loop nest:
+///   for(i=0;i<n;++i)
+///     for(j=0;j<m;++j)
+///       for(k=0;k<o;++k)
+///         ... A[i][2j+1][3k+2] ...
+/// We expect:
+///   BasePointer -> A
+///   Subscripts -> [{0,+,1}<%for.i>][{1,+,2}<%for.j>][{2,+,3}<%for.k>]
+///   Sizes -> [m][o][4]
+class IndexedReference {
+  friend raw_ostream &operator<<(raw_ostream &OS, const IndexedReference &R);
+
+public:
+  /// Construct an indexed reference given a \p StoreOrLoadInst instruction.
+  IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI,
+                   ScalarEvolution &SE);
+
+  bool isValid() const { return IsValid; }
+  const SCEV *getBasePointer() const { return BasePointer; }
+  size_t getNumSubscripts() const { return Subscripts.size(); }
+  const SCEV *getSubscript(unsigned SubNum) const {
+    assert(SubNum < getNumSubscripts() && "Invalid subscript number");
+    return Subscripts[SubNum];
+  }
+  const SCEV *getFirstSubscript() const {
+    assert(!Subscripts.empty() && "Expecting non-empty container");
+    return Subscripts.front();
+  }
+  const SCEV *getLastSubscript() const {
+    assert(!Subscripts.empty() && "Expecting non-empty container");
+    return Subscripts.back();
+  }
+
+  /// Return true/false if the current object and the indexed reference \p Other
+  /// are/aren't in the same cache line of size \p CLS. Two references are in
+  /// the same chace line iff the distance between them in the innermost
+  /// dimension is less than the cache line size. Return None if unsure.
+  Optional<bool> hasSpacialReuse(const IndexedReference &Other, unsigned CLS,
+                                 AliasAnalysis &AA) const;
+
+  /// Return true if the current object and the indexed reference \p Other
+  /// have distance smaller than \p MaxDistance in the dimension associated with
+  /// the given loop \p L. Return false if the distance is not smaller than \p
+  /// MaxDistance and None if unsure.
+  Optional<bool> hasTemporalReuse(const IndexedReference &Other,
+                                  unsigned MaxDistance, const Loop &L,
+                                  DependenceInfo &DI, AliasAnalysis &AA) const;
+
+  /// Compute the cost of the reference w.r.t. the given loop \p L when it is
+  /// considered in the innermost position in the loop nest.
+  /// The cost is defined as:
+  ///   - equal to one if the reference is loop invariant, or
+  ///   - equal to '(TripCount * stride) / cache_line_size' if:
+  ///     + the reference stride is less than the cache line size, and
+  ///     + the coefficient of this loop's index variable used in all other
+  ///       subscripts is zero
+  ///   - or otherwise equal to 'TripCount'.
+  CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const;
+
+private:
+  /// Attempt to delinearize the indexed reference.
+  bool delinearize(const LoopInfo &LI);
+
+  /// Return true if the index reference is invariant with respect to loop \p L.
+  bool isLoopInvariant(const Loop &L) const;
+
+  /// Return true if the indexed reference is 'consecutive' in loop \p L.
+  /// An indexed reference is 'consecutive' if the only coefficient that uses
+  /// the loop induction variable is the rightmost one, and the access stride is
+  /// smaller than the cache line size \p CLS.
+  bool isConsecutive(const Loop &L, unsigned CLS) const;
+
+  /// Return the coefficient used in the rightmost dimension.
+  const SCEV *getLastCoefficient() const;
+
+  /// Return true if the coefficient corresponding to induction variable of
+  /// loop \p L in the given \p Subscript is zero or is loop invariant in \p L.
+  bool isCoeffForLoopZeroOrInvariant(const SCEV &Subscript,
+                                     const Loop &L) const;
+
+  /// Verify that the given \p Subscript is 'well formed' (must be a simple add
+  /// recurrence).
+  bool isSimpleAddRecurrence(const SCEV &Subscript, const Loop &L) const;
+
+  /// Return true if the given reference \p Other is definetely aliased with
+  /// the indexed reference represented by this class.
+  bool isAliased(const IndexedReference &Other, AliasAnalysis &AA) const;
+
+private:
+  /// True if the reference can be delinearized, false otherwise.
+  bool IsValid = false;
+
+  /// Represent the memory reference instruction.
+  Instruction &StoreOrLoadInst;
+
+  /// The base pointer of the memory reference.
+  const SCEV *BasePointer = nullptr;
+
+  /// The subscript (indexes) of the memory reference.
+  SmallVector<const SCEV *, 3> Subscripts;
+
+  /// The dimensions of the memory reference.
+  SmallVector<const SCEV *, 3> Sizes;
+
+  ScalarEvolution &SE;
+};
+
+/// A reference group represents a set of memory references that exhibit
+/// temporal or spacial reuse. Two references belong to the same
+/// reference group with respect to a inner loop L iff:
+/// 1. they have a loop independent dependency, or
+/// 2. they have a loop carried dependence with a small dependence distance
+///    (e.g. less than 2) carried by the inner loop, or
+/// 3. they refer to the same array, and the subscript in their innermost
+///    dimension is less than or equal to 'd' (where 'd' is less than the cache
+///    line size)
+///
+/// Intuitively a reference group represents memory references that access
+/// the same cache line. Conditions 1,2 above account for temporal reuse, while
+/// contition 3 accounts for spacial reuse.
+using ReferenceGroupTy = SmallVector<std::unique_ptr<IndexedReference>, 8>;
+using ReferenceGroupsTy = SmallVector<ReferenceGroupTy, 8>;
+
+/// \c CacheCost represents the estimated cost of a inner loop as the number of
+/// cache lines used by the memory references it contains.
+/// The 'cache cost' of a loop 'L' in a loop nest 'LN' is computed as the sum of
+/// the cache costs of all of its reference groups when the loop is considered
+/// to be in the innermost position in the nest.
+/// A reference group represents memory references that fall into the same cache
+/// line. Each reference group is analysed with respect to the innermost loop in
+/// a loop nest. The cost of a reference is defined as follow:
+///  - one if it is loop invariant w.r.t the innermost loop,
+///  - equal to the loop trip count divided by the cache line times the
+///    reference stride if the reference stride is less than the cache line
+///    size (CLS), and the coefficient of this loop's index variable used in all
+///    other subscripts is zero (e.g. RefCost = TripCount/(CLS/RefStride))
+///  - equal to the innermost loop trip count if the reference stride is greater
+///    or equal to the cache line size CLS.
+class CacheCost {
+  friend raw_ostream &operator<<(raw_ostream &OS, const CacheCost &CC);
+  using LoopTripCountTy = std::pair<const Loop *, unsigned>;
+  using LoopCacheCostTy = std::pair<const Loop *, CacheCostTy>;
+
+public:
+  static CacheCostTy constexpr InvalidCost = -1;
+
+  /// Construct a CacheCost object for the loop nest described by \p Loops.
+  /// The optional parameter \p TRT can be used to specify the max. distance
+  /// between array elements accessed in a loop so that the elements are
+  /// classified to have temporal reuse.
+  CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE,
+            TargetTransformInfo &TTI, AliasAnalysis &AA, DependenceInfo &DI,
+            Optional<unsigned> TRT = None);
+
+  /// Create a CacheCost for the loop nest rooted by \p Root.
+  /// The optional parameter \p TRT can be used to specify the max. distance
+  /// between array elements accessed in a loop so that the elements are
+  /// classified to have temporal reuse.
+  static std::unique_ptr<CacheCost>
+  getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI,
+               Optional<unsigned> TRT = None);
+
+  /// Return the estimated cost of loop \p L if the given loop is part of the
+  /// loop nest associated with this object. Return -1 otherwise.
+  CacheCostTy getLoopCost(const Loop &L) const {
+    auto IT = std::find_if(
+        LoopCosts.begin(), LoopCosts.end(),
+        [&L](const LoopCacheCostTy &LCC) { return LCC.first == &L; });
+    return (IT != LoopCosts.end()) ? (*IT).second : -1;
+  }
+
+  /// Return the estimated ordered loop costs.
+  const ArrayRef<LoopCacheCostTy> getLoopCosts() const { return LoopCosts; }
+
+private:
+  /// Calculate the cache footprint of each loop in the nest (when it is
+  /// considered to be in the innermost position).
+  void calculateCacheFootprint();
+
+  /// Partition store/load instructions in the loop nest into reference groups.
+  /// Two or more memory accesses belong in the same reference group if they
+  /// share the same cache line.
+  bool populateReferenceGroups(ReferenceGroupsTy &RefGroups) const;
+
+  /// Calculate the cost of the given loop \p L assuming it is the innermost
+  /// loop in nest.
+  CacheCostTy computeLoopCacheCost(const Loop &L,
+                                   const ReferenceGroupsTy &RefGroups) const;
+
+  /// Compute the cost of a representative reference in reference group \p RG
+  /// when the given loop \p L is considered as the innermost loop in the nest.
+  /// The computed cost is an estimate for the number of cache lines used by the
+  /// reference group. The representative reference cost is defined as:
+  ///   - equal to one if the reference is loop invariant, or
+  ///   - equal to '(TripCount * stride) / cache_line_size' if (a) loop \p L's
+  ///     induction variable is used only in the reference subscript associated
+  ///     with loop \p L, and (b) the reference stride is less than the cache
+  ///     line size, or
+  ///   - TripCount otherwise
+  CacheCostTy computeRefGroupCacheCost(const ReferenceGroupTy &RG,
+                                       const Loop &L) const;
+
+  /// Sort the LoopCosts vector by decreasing cache cost.
+  void sortLoopCosts() {
+    sort(LoopCosts, [](const LoopCacheCostTy &A, const LoopCacheCostTy &B) {
+      return A.second > B.second;
+    });
+  }
+
+private:
+  /// Loops in the loop nest associated with this object.
+  LoopVectorTy Loops;
+
+  /// Trip counts for the loops in the loop nest associated with this object.
+  SmallVector<LoopTripCountTy, 3> TripCounts;
+
+  /// Cache costs for the loops in the loop nest associated with this object.
+  SmallVector<LoopCacheCostTy, 3> LoopCosts;
+
+  /// The max. distance between array elements accessed in a loop so that the
+  /// elements are classified to have temporal reuse.
+  Optional<unsigned> TRT;
+
+  const LoopInfo &LI;
+  ScalarEvolution &SE;
+  TargetTransformInfo &TTI;
+  AliasAnalysis &AA;
+  DependenceInfo &DI;
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const IndexedReference &R);
+raw_ostream &operator<<(raw_ostream &OS, const CacheCost &CC);
+
+/// Printer pass for the \c CacheCost results.
+class LoopCachePrinterPass : public PassInfoMixin<LoopCachePrinterPass> {
+  raw_ostream &OS;
+
+public:
+  explicit LoopCachePrinterPass(raw_ostream &OS) : OS(OS) {}
+
+  PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
+                        LoopStandardAnalysisResults &AR, LPMUpdater &U);
+};
+
+} // namespace llvm
+
+#endif // LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h
index 584eb3a8c854..abf3863b0601 100644
--- a/include/llvm/Analysis/LoopInfo.h
+++ b/include/llvm/Analysis/LoopInfo.h
@@ -30,6 +30,9 @@
 // instance.  In particular, a Loop might be inside such a non-loop SCC, or a
 // non-loop SCC might contain a sub-SCC which is a Loop.
 //
+// For an overview of terminology used in this API (and thus all of our loop
+// analyses or transforms), see docs/LoopTerminology.rst.
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ANALYSIS_LOOPINFO_H
@@ -570,9 +573,9 @@ public:
   bool getIncomingAndBackEdge(BasicBlock *&Incoming,
                               BasicBlock *&Backedge) const;
 
-  /// Below are some utilities to get loop bounds and induction variable, and
-  /// check if a given phinode is an auxiliary induction variable, as well as
-  /// checking if the loop is canonical.
+  /// Below are some utilities to get the loop guard, loop bounds and induction
+  /// variable, and to check if a given phinode is an auxiliary induction
+  /// variable, if the loop is guarded, and if the loop is canonical.
   ///
   /// Here is an example:
   /// \code
@@ -604,6 +607,9 @@ public:
   ///
   /// - getInductionVariable            --> i_1
   /// - isAuxiliaryInductionVariable(x) --> true if x == i_1
+  /// - getLoopGuardBranch()
+  ///                 --> `if (guardcmp) goto preheader; else goto afterloop`
+  /// - isGuarded()                     --> true
   /// - isCanonical                     --> false
   struct LoopBounds {
     /// Return the LoopBounds object if
@@ -725,6 +731,31 @@ public:
   bool isAuxiliaryInductionVariable(PHINode &AuxIndVar,
                                     ScalarEvolution &SE) const;
 
+  /// Return the loop guard branch, if it exists.
+  ///
+  /// This currently only works on simplified loop, as it requires a preheader
+  /// and a latch to identify the guard. It will work on loops of the form:
+  /// \code
+  /// GuardBB:
+  ///   br cond1, Preheader, ExitSucc <== GuardBranch
+  /// Preheader:
+  ///   br Header
+  /// Header:
+  ///  ...
+  ///   br Latch
+  /// Latch:
+  ///   br cond2, Header, ExitBlock
+  /// ExitBlock:
+  ///   br ExitSucc
+  /// ExitSucc:
+  /// \endcode
+  BranchInst *getLoopGuardBranch() const;
+
+  /// Return true iff the loop is
+  /// - in simplify rotated form, and
+  /// - guarded by a loop guard branch.
+  bool isGuarded() const { return (getLoopGuardBranch() != nullptr); }
+
   /// Return true if the loop induction variable starts at zero and increments
   /// by one each time through the loop.
   bool isCanonical(ScalarEvolution &SE) const;
diff --git a/include/llvm/Analysis/LoopInfoImpl.h b/include/llvm/Analysis/LoopInfoImpl.h
index 4c33dac9e21e..8b11e848a195 100644
--- a/include/llvm/Analysis/LoopInfoImpl.h
+++ b/include/llvm/Analysis/LoopInfoImpl.h
@@ -85,9 +85,9 @@ template <class BlockT, class LoopT>
 bool LoopBase<BlockT, LoopT>::hasDedicatedExits() const {
   // Each predecessor of each exit block of a normal loop is contained
   // within the loop.
-  SmallVector<BlockT *, 4> ExitBlocks;
-  getExitBlocks(ExitBlocks);
-  for (BlockT *EB : ExitBlocks)
+  SmallVector<BlockT *, 4> UniqueExitBlocks;
+  getUniqueExitBlocks(UniqueExitBlocks);
+  for (BlockT *EB : UniqueExitBlocks)
     for (BlockT *Predecessor : children<Inverse<BlockT *>>(EB))
       if (!contains(Predecessor))
         return false;
@@ -200,8 +200,6 @@ BlockT *LoopBase<BlockT, LoopT>::getLoopPredecessor() const {
     }
   }
 
-  // Make sure there is only one exit out of the preheader.
-  assert(Out && "Header of loop has no predecessors from outside loop?");
   return Out;
 }
 
diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h
index 49f9e58ffad7..a89d76b9e5bd 100644
--- a/include/llvm/Analysis/MemoryBuiltins.h
+++ b/include/llvm/Analysis/MemoryBuiltins.h
@@ -58,6 +58,9 @@ class Value;
 /// like).
 bool isAllocationFn(const Value *V, const TargetLibraryInfo *TLI,
                     bool LookThroughBitCast = false);
+bool isAllocationFn(const Value *V,
+                    function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
+                    bool LookThroughBitCast = false);
 
 /// Tests if a value is a call or invoke to a function that returns a
 /// NoAlias pointer (including malloc/calloc/realloc/strdup-like functions).
@@ -68,6 +71,9 @@ bool isNoAliasFn(const Value *V, const TargetLibraryInfo *TLI,
 /// allocates uninitialized memory (such as malloc).
 bool isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
                     bool LookThroughBitCast = false);
+bool isMallocLikeFn(const Value *V,
+                    function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
+                    bool LookThroughBitCast = false);
 
 /// Tests if a value is a call or invoke to a library function that
 /// allocates zero-filled memory (such as calloc).
@@ -93,6 +99,16 @@ bool isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
 /// reallocates memory (e.g., realloc).
 bool isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI);
 
+/// Tests if a value is a call or invoke to a library function that
+/// allocates memory and throws if an allocation failed (e.g., new).
+bool isOpNewLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+                     bool LookThroughBitCast = false);
+
+/// Tests if a value is a call or invoke to a library function that
+/// allocates memory (strdup, strndup).
+bool isStrdupLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+                     bool LookThroughBitCast = false);
+
 //===----------------------------------------------------------------------===//
 //  malloc Call Utility Functions.
 //
@@ -100,9 +116,13 @@ bool isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI);
 /// extractMallocCall - Returns the corresponding CallInst if the instruction
 /// is a malloc call.  Since CallInst::CreateMalloc() only creates calls, we
 /// ignore InvokeInst here.
-const CallInst *extractMallocCall(const Value *I, const TargetLibraryInfo *TLI);
-inline CallInst *extractMallocCall(Value *I, const TargetLibraryInfo *TLI) {
-  return const_cast<CallInst*>(extractMallocCall((const Value*)I, TLI));
+const CallInst *
+extractMallocCall(const Value *I,
+                  function_ref<const TargetLibraryInfo &(Function &)> GetTLI);
+inline CallInst *
+extractMallocCall(Value *I,
+                  function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {
+  return const_cast<CallInst *>(extractMallocCall((const Value *)I, GetTLI));
 }
 
 /// getMallocType - Returns the PointerType resulting from the malloc call.
diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h
index e2669c2fa601..e89e5690fad0 100644
--- a/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -362,11 +362,14 @@ private:
   PhiValues &PV;
   PredIteratorCache PredCache;
 
+  unsigned DefaultBlockScanLimit;
+
 public:
   MemoryDependenceResults(AliasAnalysis &AA, AssumptionCache &AC,
-                          const TargetLibraryInfo &TLI,
-                          DominatorTree &DT, PhiValues &PV)
-      : AA(AA), AC(AC), TLI(TLI), DT(DT), PV(PV) {}
+                          const TargetLibraryInfo &TLI, DominatorTree &DT,
+                          PhiValues &PV, unsigned DefaultBlockScanLimit)
+      : AA(AA), AC(AC), TLI(TLI), DT(DT), PV(PV),
+        DefaultBlockScanLimit(DefaultBlockScanLimit) {}
 
   /// Handle invalidation in the new PM.
   bool invalidate(Function &F, const PreservedAnalyses &PA,
@@ -511,9 +514,14 @@ class MemoryDependenceAnalysis
 
   static AnalysisKey Key;
 
+  unsigned DefaultBlockScanLimit;
+
 public:
   using Result = MemoryDependenceResults;
 
+  MemoryDependenceAnalysis();
+  MemoryDependenceAnalysis(unsigned DefaultBlockScanLimit) : DefaultBlockScanLimit(DefaultBlockScanLimit) { }
+
   MemoryDependenceResults run(Function &F, FunctionAnalysisManager &AM);
 };
 
diff --git a/include/llvm/Analysis/MemorySSA.h b/include/llvm/Analysis/MemorySSA.h
index b7730be75354..e89bf26a7234 100644
--- a/include/llvm/Analysis/MemorySSA.h
+++ b/include/llvm/Analysis/MemorySSA.h
@@ -793,6 +793,7 @@ protected:
   friend class MemorySSAPrinterLegacyPass;
   friend class MemorySSAUpdater;
 
+  void verifyPrevDefInPhis(Function &F) const;
   void verifyDefUses(Function &F) const;
   void verifyDomination(Function &F) const;
   void verifyOrdering(Function &F) const;
@@ -830,7 +831,8 @@ protected:
   void insertIntoListsBefore(MemoryAccess *, const BasicBlock *,
                              AccessList::iterator);
   MemoryUseOrDef *createDefinedAccess(Instruction *, MemoryAccess *,
-                                      const MemoryUseOrDef *Template = nullptr);
+                                      const MemoryUseOrDef *Template = nullptr,
+                                      bool CreationMustSucceed = true);
 
 private:
   template <class AliasAnalysisType> class ClobberWalkerBase;
diff --git a/include/llvm/Analysis/MemorySSAUpdater.h b/include/llvm/Analysis/MemorySSAUpdater.h
index d4d8040c1ff6..1d34663721e3 100644
--- a/include/llvm/Analysis/MemorySSAUpdater.h
+++ b/include/llvm/Analysis/MemorySSAUpdater.h
@@ -99,7 +99,7 @@ public:
   /// load a
   /// Where a mayalias b, *does* require RenameUses be set to true.
   void insertDef(MemoryDef *Def, bool RenameUses = false);
-  void insertUse(MemoryUse *Use);
+  void insertUse(MemoryUse *Use, bool RenameUses = false);
   /// Update the MemoryPhi in `To` following an edge deletion between `From` and
   /// `To`. If `To` becomes unreachable, a call to removeBlocks should be made.
   void removeEdge(BasicBlock *From, BasicBlock *To);
@@ -275,6 +275,7 @@ private:
   getPreviousDefRecursive(BasicBlock *,
                           DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &);
   MemoryAccess *recursePhi(MemoryAccess *Phi);
+  MemoryAccess *tryRemoveTrivialPhi(MemoryPhi *Phi);
   template <class RangeType>
   MemoryAccess *tryRemoveTrivialPhi(MemoryPhi *Phi, RangeType &Operands);
   void tryRemoveTrivialPhis(ArrayRef<WeakVH> UpdatedPHIs);
diff --git a/include/llvm/Analysis/MustExecute.h b/include/llvm/Analysis/MustExecute.h
index 3ef539c89d97..87cf9f85c7f1 100644
--- a/include/llvm/Analysis/MustExecute.h
+++ b/include/llvm/Analysis/MustExecute.h
@@ -7,10 +7,17 @@
 //===----------------------------------------------------------------------===//
 /// \file
 /// Contains a collection of routines for determining if a given instruction is
-/// guaranteed to execute if a given point in control flow is reached.  The most
+/// guaranteed to execute if a given point in control flow is reached. The most
 /// common example is an instruction within a loop being provably executed if we
 /// branch to the header of it's containing loop.
 ///
+/// There are two interfaces available to determine if an instruction is
+/// executed once a given point in the control flow is reached:
+/// 1) A loop-centric one derived from LoopSafetyInfo.
+/// 2) A "must be executed context"-based one implemented in the
+///    MustBeExecutedContextExplorer.
+/// Please refer to the class comments for more information.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ANALYSIS_MUSTEXECUTE_H
@@ -164,6 +171,280 @@ public:
   virtual ~ICFLoopSafetyInfo() {};
 };
 
-}
+struct MustBeExecutedContextExplorer;
+
+/// Must be executed iterators visit stretches of instructions that are
+/// guaranteed to be executed together, potentially with other instruction
+/// executed in-between.
+///
+/// Given the following code, and assuming all statements are single
+/// instructions which transfer execution to the successor (see
+/// isGuaranteedToTransferExecutionToSuccessor), there are two possible
+/// outcomes. If we start the iterator at A, B, or E, we will visit only A, B,
+/// and E. If we start at C or D, we will visit all instructions A-E.
+///
+/// \code
+///   A;
+///   B;
+///   if (...) {
+///     C;
+///     D;
+///   }
+///   E;
+/// \endcode
+///
+///
+/// Below is the example extneded with instructions F and G. Now we assume F
+/// might not transfer execution to it's successor G. As a result we get the
+/// following visit sets:
+///
+/// Start Instruction   | Visit Set
+/// A                   | A, B,       E, F
+///    B                | A, B,       E, F
+///       C             | A, B, C, D, E, F
+///          D          | A, B, C, D, E, F
+///             E       | A, B,       E, F
+///                F    | A, B,       E, F
+///                   G | A, B,       E, F, G
+///
+///
+/// \code
+///   A;
+///   B;
+///   if (...) {
+///     C;
+///     D;
+///   }
+///   E;
+///   F;  // Might not transfer execution to its successor G.
+///   G;
+/// \endcode
+///
+///
+/// A more complex example involving conditionals, loops, break, and continue
+/// is shown below. We again assume all instructions will transmit control to
+/// the successor and we assume we can prove the inner loop to be finite. We
+/// omit non-trivial branch conditions as the exploration is oblivious to them.
+/// Constant branches are assumed to be unconditional in the CFG. The resulting
+/// visist sets are shown in the table below.
+///
+/// \code
+///   A;
+///   while (true) {
+///     B;
+///     if (...)
+///       C;
+///     if (...)
+///       continue;
+///     D;
+///     if (...)
+///       break;
+///     do {
+///       if (...)
+///         continue;
+///       E;
+///     } while (...);
+///     F;
+///   }
+///   G;
+/// \endcode
+///
+/// Start Instruction    | Visit Set
+/// A                    | A, B
+///    B                 | A, B
+///       C              | A, B, C
+///          D           | A, B,    D
+///             E        | A, B,    D, E, F
+///                F     | A, B,    D,    F
+///                   G  | A, B,    D,       G
+///
+///
+/// Note that the examples show optimal visist sets but not necessarily the ones
+/// derived by the explorer depending on the available CFG analyses (see
+/// MustBeExecutedContextExplorer). Also note that we, depending on the options,
+/// the visit set can contain instructions from other functions.
+struct MustBeExecutedIterator {
+  /// Type declarations that make his class an input iterator.
+  ///{
+  typedef const Instruction *value_type;
+  typedef std::ptrdiff_t difference_type;
+  typedef const Instruction **pointer;
+  typedef const Instruction *&reference;
+  typedef std::input_iterator_tag iterator_category;
+  ///}
+
+  using ExplorerTy = MustBeExecutedContextExplorer;
+
+  MustBeExecutedIterator(const MustBeExecutedIterator &Other)
+      : Visited(Other.Visited), Explorer(Other.Explorer),
+        CurInst(Other.CurInst) {}
+
+  MustBeExecutedIterator(MustBeExecutedIterator &&Other)
+      : Visited(std::move(Other.Visited)), Explorer(Other.Explorer),
+        CurInst(Other.CurInst) {}
+
+  MustBeExecutedIterator &operator=(MustBeExecutedIterator &&Other) {
+    if (this != &Other) {
+      std::swap(Visited, Other.Visited);
+      std::swap(CurInst, Other.CurInst);
+    }
+    return *this;
+  }
+
+  ~MustBeExecutedIterator() {}
+
+  /// Pre- and post-increment operators.
+  ///{
+  MustBeExecutedIterator &operator++() {
+    CurInst = advance();
+    return *this;
+  }
+
+  MustBeExecutedIterator operator++(int) {
+    MustBeExecutedIterator tmp(*this);
+    operator++();
+    return tmp;
+  }
+  ///}
+
+  /// Equality and inequality operators. Note that we ignore the history here.
+  ///{
+  bool operator==(const MustBeExecutedIterator &Other) const {
+    return CurInst == Other.CurInst;
+  }
+
+  bool operator!=(const MustBeExecutedIterator &Other) const {
+    return !(*this == Other);
+  }
+  ///}
+
+  /// Return the underlying instruction.
+  const Instruction *&operator*() { return CurInst; }
+  const Instruction *getCurrentInst() const { return CurInst; }
+
+  /// Return true if \p I was encountered by this iterator already.
+  bool count(const Instruction *I) const { return Visited.count(I); }
+
+private:
+  using VisitedSetTy = DenseSet<const Instruction *>;
+
+  /// Private constructors.
+  MustBeExecutedIterator(ExplorerTy &Explorer, const Instruction *I);
+
+  /// Reset the iterator to its initial state pointing at \p I.
+  void reset(const Instruction *I);
+
+  /// Try to advance one of the underlying positions (Head or Tail).
+  ///
+  /// \return The next instruction in the must be executed context, or nullptr
+  ///         if none was found.
+  const Instruction *advance();
+
+  /// A set to track the visited instructions in order to deal with endless
+  /// loops and recursion.
+  VisitedSetTy Visited;
+
+  /// A reference to the explorer that created this iterator.
+  ExplorerTy &Explorer;
+
+  /// The instruction we are currently exposing to the user. There is always an
+  /// instruction that we know is executed with the given program point,
+  /// initially the program point itself.
+  const Instruction *CurInst;
+
+  friend struct MustBeExecutedContextExplorer;
+};
+
+/// A "must be executed context" for a given program point PP is the set of
+/// instructions, potentially before and after PP, that are executed always when
+/// PP is reached. The MustBeExecutedContextExplorer an interface to explore
+/// "must be executed contexts" in a module through the use of
+/// MustBeExecutedIterator.
+///
+/// The explorer exposes "must be executed iterators" that traverse the must be
+/// executed context. There is little information sharing between iterators as
+/// the expected use case involves few iterators for "far apart" instructions.
+/// If that changes, we should consider caching more intermediate results.
+struct MustBeExecutedContextExplorer {
+
+  /// In the description of the parameters we use PP to denote a program point
+  /// for which the must be executed context is explored, or put differently,
+  /// for which the MustBeExecutedIterator is created.
+  ///
+  /// \param ExploreInterBlock    Flag to indicate if instructions in blocks
+  ///                             other than the parent of PP should be
+  ///                             explored.
+  MustBeExecutedContextExplorer(bool ExploreInterBlock)
+      : ExploreInterBlock(ExploreInterBlock), EndIterator(*this, nullptr) {}
+
+  /// Clean up the dynamically allocated iterators.
+  ~MustBeExecutedContextExplorer() {
+    DeleteContainerSeconds(InstructionIteratorMap);
+  }
+
+  /// Iterator-based interface. \see MustBeExecutedIterator.
+  ///{
+  using iterator = MustBeExecutedIterator;
+  using const_iterator = const MustBeExecutedIterator;
+
+  /// Return an iterator to explore the context around \p PP.
+  iterator &begin(const Instruction *PP) {
+    auto *&It = InstructionIteratorMap[PP];
+    if (!It)
+      It = new iterator(*this, PP);
+    return *It;
+  }
+
+  /// Return an iterator to explore the cached context around \p PP.
+  const_iterator &begin(const Instruction *PP) const {
+    return *InstructionIteratorMap.lookup(PP);
+  }
+
+  /// Return an universal end iterator.
+  ///{
+  iterator &end() { return EndIterator; }
+  iterator &end(const Instruction *) { return EndIterator; }
+
+  const_iterator &end() const { return EndIterator; }
+  const_iterator &end(const Instruction *) const { return EndIterator; }
+  ///}
+
+  /// Return an iterator range to explore the context around \p PP.
+  llvm::iterator_range<iterator> range(const Instruction *PP) {
+    return llvm::make_range(begin(PP), end(PP));
+  }
+
+  /// Return an iterator range to explore the cached context around \p PP.
+  llvm::iterator_range<const_iterator> range(const Instruction *PP) const {
+    return llvm::make_range(begin(PP), end(PP));
+  }
+  ///}
+
+  /// Return the next instruction that is guaranteed to be executed after \p PP.
+  ///
+  /// \param It              The iterator that is used to traverse the must be
+  ///                        executed context.
+  /// \param PP              The program point for which the next instruction
+  ///                        that is guaranteed to execute is determined.
+  const Instruction *
+  getMustBeExecutedNextInstruction(MustBeExecutedIterator &It,
+                                   const Instruction *PP);
+
+  /// Parameter that limit the performed exploration. See the constructor for
+  /// their meaning.
+  ///{
+  const bool ExploreInterBlock;
+  ///}
+
+private:
+  /// Map from instructions to associated must be executed iterators.
+  DenseMap<const Instruction *, MustBeExecutedIterator *>
+      InstructionIteratorMap;
+
+  /// A unique end iterator.
+  MustBeExecutedIterator EndIterator;
+};
+
+} // namespace llvm
 
 #endif
diff --git a/include/llvm/Analysis/Passes.h b/include/llvm/Analysis/Passes.h
index d9c97dff8c6e..8562519fa7b1 100644
--- a/include/llvm/Analysis/Passes.h
+++ b/include/llvm/Analysis/Passes.h
@@ -103,6 +103,13 @@ namespace llvm {
   //
   FunctionPass *createMustExecutePrinter();
 
+  //===--------------------------------------------------------------------===//
+  //
+  // createMustBeExecutedContextPrinter - This pass prints information about which
+  // instructions are guaranteed to execute together (run with -analyze).
+  //
+  ModulePass *createMustBeExecutedContextPrinter();
+
 }
 
 #endif
diff --git a/include/llvm/Analysis/ProfileSummaryInfo.h b/include/llvm/Analysis/ProfileSummaryInfo.h
index f309d344b8d1..6693e40ccf22 100644
--- a/include/llvm/Analysis/ProfileSummaryInfo.h
+++ b/include/llvm/Analysis/ProfileSummaryInfo.h
@@ -52,6 +52,15 @@ private:
   // because the number of profile counts required to reach the hot
   // percentile is above a huge threshold.
   Optional<bool> HasHugeWorkingSetSize;
+  // True if the working set size of the code is considered large,
+  // because the number of profile counts required to reach the hot
+  // percentile is above a large threshold.
+  Optional<bool> HasLargeWorkingSetSize;
+  // Compute the threshold for a given cutoff.
+  Optional<uint64_t> computeThreshold(int PercentileCutoff);
+  // The map that caches the threshold values. The keys are the percentile
+  // cutoff values and the values are the corresponding threshold values.
+  DenseMap<int, uint64_t> ThresholdCache;
 
 public:
   ProfileSummaryInfo(Module &M) : M(M) {}
@@ -96,6 +105,8 @@ public:
                                      bool AllowSynthetic = false);
   /// Returns true if the working set size of the code is considered huge.
   bool hasHugeWorkingSetSize();
+  /// Returns true if the working set size of the code is considered large.
+  bool hasLargeWorkingSetSize();
   /// Returns true if \p F has hot function entry.
   bool isFunctionEntryHot(const Function *F);
   /// Returns true if \p F contains hot code.
@@ -104,14 +115,26 @@ public:
   bool isFunctionEntryCold(const Function *F);
   /// Returns true if \p F contains only cold code.
   bool isFunctionColdInCallGraph(const Function *F, BlockFrequencyInfo &BFI);
+  /// Returns true if \p F contains hot code with regard to a given hot
+  /// percentile cutoff value.
+  bool isFunctionHotInCallGraphNthPercentile(int PercentileCutoff,
+                                             const Function *F,
+                                             BlockFrequencyInfo &BFI);
   /// Returns true if count \p C is considered hot.
   bool isHotCount(uint64_t C);
   /// Returns true if count \p C is considered cold.
   bool isColdCount(uint64_t C);
+  /// Returns true if count \p C is considered hot with regard to a given
+  /// hot percentile cutoff value.
+  bool isHotCountNthPercentile(int PercentileCutoff, uint64_t C);
   /// Returns true if BasicBlock \p BB is considered hot.
   bool isHotBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI);
   /// Returns true if BasicBlock \p BB is considered cold.
   bool isColdBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI);
+  /// Returns true if BasicBlock \p BB is considered hot with regard to a given
+  /// hot percentile cutoff value.
+  bool isHotBlockNthPercentile(int PercentileCutoff,
+                               const BasicBlock *BB, BlockFrequencyInfo *BFI);
   /// Returns true if CallSite \p CS is considered hot.
   bool isHotCallSite(const CallSite &CS, BlockFrequencyInfo *BFI);
   /// Returns true if Callsite \p CS is considered cold.
diff --git a/include/llvm/Analysis/RegionInfoImpl.h b/include/llvm/Analysis/RegionInfoImpl.h
index c59c09dd2095..6b5936680c37 100644
--- a/include/llvm/Analysis/RegionInfoImpl.h
+++ b/include/llvm/Analysis/RegionInfoImpl.h
@@ -365,7 +365,7 @@ typename Tr::RegionNodeT *RegionBase<Tr>::getBBNode(BlockT *BB) const {
     auto Deconst = const_cast<RegionBase<Tr> *>(this);
     typename BBNodeMapT::value_type V = {
         BB,
-        llvm::make_unique<RegionNodeT>(static_cast<RegionT *>(Deconst), BB)};
+        std::make_unique<RegionNodeT>(static_cast<RegionT *>(Deconst), BB)};
     at = BBNodeMap.insert(std::move(V)).first;
   }
   return at->second.get();
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index 0bd98ef37e7a..9c55f7a5090f 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -468,6 +468,8 @@ template <> struct DenseMapInfo<ExitLimitQuery> {
 /// can't do much with the SCEV objects directly, they must ask this class
 /// for services.
 class ScalarEvolution {
+  friend class ScalarEvolutionsTest;
+
 public:
   /// An enum describing the relationship between a SCEV and a loop.
   enum LoopDisposition {
@@ -777,10 +779,10 @@ public:
   /// to (i.e. a "conservative over-approximation") of the value returend by
   /// getBackedgeTakenCount.  If such a value cannot be computed, it returns the
   /// SCEVCouldNotCompute object.
-  const SCEV *getMaxBackedgeTakenCount(const Loop *L);
+  const SCEV *getConstantMaxBackedgeTakenCount(const Loop *L);
 
   /// Return true if the backedge taken count is either the value returned by
-  /// getMaxBackedgeTakenCount or zero.
+  /// getConstantMaxBackedgeTakenCount or zero.
   bool isBackedgeTakenCountMaxOrZero(const Loop *L);
 
   /// Return true if the specified loop has an analyzable loop-invariant
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index a519f93216b3..b4d727449fbe 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -77,9 +77,13 @@ namespace llvm {
     /// Phis that complete an IV chain. Reuse
     DenseSet<AssertingVH<PHINode>> ChainedPhis;
 
-    /// When true, expressions are expanded in "canonical" form. In particular,
-    /// addrecs are expanded as arithmetic based on a canonical induction
-    /// variable. When false, expression are expanded in a more literal form.
+    /// When true, SCEVExpander tries to expand expressions in "canonical" form.
+    /// When false, expressions are expanded in a more literal form.
+    ///
+    /// In "canonical" form addrecs are expanded as arithmetic based on a
+    /// canonical induction variable. Note that CanonicalMode doesn't guarantee
+    /// that all expressions are expanded in "canonical" form. For some
+    /// expressions literal mode can be preferred.
     bool CanonicalMode;
 
     /// When invoked from LSR, the expander is in "strength reduction" mode. The
@@ -275,8 +279,16 @@ namespace llvm {
 
     /// Clear the current insertion point. This is useful if the instruction
     /// that had been serving as the insertion point may have been deleted.
-    void clearInsertPoint() {
-      Builder.ClearInsertionPoint();
+    void clearInsertPoint() { Builder.ClearInsertionPoint(); }
+
+    /// Set location information used by debugging information.
+    void SetCurrentDebugLocation(DebugLoc L) {
+      Builder.SetCurrentDebugLocation(std::move(L));
+    }
+
+    /// Get location information used by debugging information.
+    const DebugLoc &getCurrentDebugLocation() const {
+      return Builder.getCurrentDebugLocation();
     }
 
     /// Return true if the specified instruction was inserted by the code
diff --git a/include/llvm/Analysis/TargetLibraryInfo.h b/include/llvm/Analysis/TargetLibraryInfo.h
index 4b5200f5a838..d4b223863c54 100644
--- a/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/include/llvm/Analysis/TargetLibraryInfo.h
@@ -30,11 +30,12 @@ struct VecDesc {
   unsigned VectorizationFactor;
 };
 
-  enum LibFunc {
+  enum LibFunc : unsigned {
 #define TLI_DEFINE_ENUM
 #include "llvm/Analysis/TargetLibraryInfo.def"
 
-    NumLibFuncs
+    NumLibFuncs,
+    NotLibFunc
   };
 
 /// Implementation of the target library information.
@@ -48,7 +49,7 @@ class TargetLibraryInfoImpl {
 
   unsigned char AvailableArray[(NumLibFuncs+3)/4];
   llvm::DenseMap<unsigned, std::string> CustomNames;
-  static StringRef const StandardNames[NumLibFuncs];
+  static StringLiteral const StandardNames[NumLibFuncs];
   bool ShouldExtI32Param, ShouldExtI32Return, ShouldSignExtI32Param;
 
   enum AvailabilityState {
@@ -359,7 +360,6 @@ public:
   TargetLibraryAnalysis(TargetLibraryInfoImpl PresetInfoImpl)
       : PresetInfoImpl(std::move(PresetInfoImpl)) {}
 
-  TargetLibraryInfo run(Module &M, ModuleAnalysisManager &);
   TargetLibraryInfo run(Function &F, FunctionAnalysisManager &);
 
 private:
@@ -385,8 +385,13 @@ public:
   explicit TargetLibraryInfoWrapperPass(const Triple &T);
   explicit TargetLibraryInfoWrapperPass(const TargetLibraryInfoImpl &TLI);
 
-  TargetLibraryInfo &getTLI() { return TLI; }
-  const TargetLibraryInfo &getTLI() const { return TLI; }
+  TargetLibraryInfo &getTLI(const Function &F LLVM_ATTRIBUTE_UNUSED) {
+    return TLI;
+  }
+  const TargetLibraryInfo &
+  getTLI(const Function &F LLVM_ATTRIBUTE_UNUSED) const {
+    return TLI;
+  }
 };
 
 } // end namespace llvm
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h
index 7574b811bc1c..d6fa88411654 100644
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -368,6 +368,20 @@ public:
   /// optimize away.
   unsigned getFlatAddressSpace() const;
 
+  /// Return any intrinsic address operand indexes which may be rewritten if
+  /// they use a flat address space pointer.
+  ///
+  /// \returns true if the intrinsic was handled.
+  bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
+                                  Intrinsic::ID IID) const;
+
+  /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
+  /// NewV, which has a different address space. This should happen for every
+  /// operand index that collectFlatAddressOperands returned for the intrinsic.
+  /// \returns true if the intrinsic /// was handled.
+  bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
+                                        Value *OldV, Value *NewV) const;
+
   /// Test whether calls to a function lower to actual program function
   /// calls.
   ///
@@ -469,12 +483,17 @@ public:
     bool Force;
     /// Allow using trip count upper bound to unroll loops.
     bool UpperBound;
-    /// Allow peeling off loop iterations for loops with low dynamic tripcount.
+    /// Allow peeling off loop iterations.
     bool AllowPeeling;
     /// Allow unrolling of all the iterations of the runtime loop remainder.
     bool UnrollRemainder;
     /// Allow unroll and jam. Used to enable unroll and jam for the target.
     bool UnrollAndJam;
+    /// Allow peeling basing on profile. Uses to enable peeling off all
+    /// iterations basing on provided profile.
+    /// If the value is true the peeling cost model can decide to peel only
+    /// some iterations and in this case it will set this to false.
+    bool PeelProfiledIterations;
     /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
     /// value above is used during unroll and jam for the outer loop size.
     /// This value is used in the same manner to limit the size of the inner
@@ -555,15 +574,15 @@ public:
   /// modes that operate across loop iterations.
   bool shouldFavorBackedgeIndex(const Loop *L) const;
 
-  /// Return true if the target supports masked load.
-  bool isLegalMaskedStore(Type *DataType) const;
   /// Return true if the target supports masked store.
-  bool isLegalMaskedLoad(Type *DataType) const;
+  bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) const;
+  /// Return true if the target supports masked load.
+  bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) const;
 
   /// Return true if the target supports nontemporal store.
-  bool isLegalNTStore(Type *DataType, unsigned Alignment) const;
+  bool isLegalNTStore(Type *DataType, Align Alignment) const;
   /// Return true if the target supports nontemporal load.
-  bool isLegalNTLoad(Type *DataType, unsigned Alignment) const;
+  bool isLegalNTLoad(Type *DataType, Align Alignment) const;
 
   /// Return true if the target supports masked scatter.
   bool isLegalMaskedScatter(Type *DataType) const;
@@ -622,12 +641,6 @@ public:
   /// Return true if this type is legal.
   bool isTypeLegal(Type *Ty) const;
 
-  /// Returns the target's jmp_buf alignment in bytes.
-  unsigned getJumpBufAlignment() const;
-
-  /// Returns the target's jmp_buf size in bytes.
-  unsigned getJumpBufSize() const;
-
   /// Return true if switches should be turned into lookup tables for the
   /// target.
   bool shouldBuildLookupTables() const;
@@ -775,10 +788,23 @@ public:
   /// Additional properties of an operand's values.
   enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
 
-  /// \return The number of scalar or vector registers that the target has.
-  /// If 'Vectors' is true, it returns the number of vector registers. If it is
-  /// set to false, it returns the number of scalar registers.
-  unsigned getNumberOfRegisters(bool Vector) const;
+  /// \return the number of registers in the target-provided register class.
+  unsigned getNumberOfRegisters(unsigned ClassID) const;
+
+  /// \return the target-provided register class ID for the provided type,
+  /// accounting for type promotion and other type-legalization techniques that the target might apply.
+  /// However, it specifically does not account for the scalarization or splitting of vector types.
+  /// Should a vector type require scalarization or splitting into multiple underlying vector registers,
+  /// that type should be mapped to a register class containing no registers.
+  /// Specifically, this is designed to provide a simple, high-level view of the register allocation
+  /// later performed by the backend. These register classes don't necessarily map onto the
+  /// register classes used by the backend.
+  /// FIXME: It's not currently possible to determine how many registers
+  /// are used by the provided type.
+  unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
+
+  /// \return the target-provided register class name
+  const char* getRegisterClassName(unsigned ClassID) const;
 
   /// \return The width of the largest scalar or vector register type.
   unsigned getRegisterBitWidth(bool Vector) const;
@@ -824,18 +850,20 @@ public:
   /// \return The associativity of the cache level, if available.
   llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
 
-  /// \return How much before a load we should place the prefetch instruction.
-  /// This is currently measured in number of instructions.
+  /// \return How much before a load we should place the prefetch
+  /// instruction.  This is currently measured in number of
+  /// instructions.
   unsigned getPrefetchDistance() const;
 
-  /// \return Some HW prefetchers can handle accesses up to a certain constant
-  /// stride.  This is the minimum stride in bytes where it makes sense to start
-  /// adding SW prefetches.  The default is 1, i.e. prefetch with any stride.
+  /// \return Some HW prefetchers can handle accesses up to a certain
+  /// constant stride.  This is the minimum stride in bytes where it
+  /// makes sense to start adding SW prefetches.  The default is 1,
+  /// i.e. prefetch with any stride.
   unsigned getMinPrefetchStride() const;
 
-  /// \return The maximum number of iterations to prefetch ahead.  If the
-  /// required number of iterations is more than this number, no prefetching is
-  /// performed.
+  /// \return The maximum number of iterations to prefetch ahead.  If
+  /// the required number of iterations is more than this number, no
+  /// prefetching is performed.
   unsigned getMaxPrefetchIterationsAhead() const;
 
   /// \return The maximum interleave factor that any transform should try to
@@ -1155,6 +1183,10 @@ public:
   virtual bool isSourceOfDivergence(const Value *V) = 0;
   virtual bool isAlwaysUniform(const Value *V) = 0;
   virtual unsigned getFlatAddressSpace() = 0;
+  virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
+                                          Intrinsic::ID IID) const = 0;
+  virtual bool rewriteIntrinsicWithAddressSpace(
+    IntrinsicInst *II, Value *OldV, Value *NewV) const = 0;
   virtual bool isLoweredToCall(const Function *F) = 0;
   virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
                                        UnrollingPreferences &UP) = 0;
@@ -1177,10 +1209,10 @@ public:
                           TargetLibraryInfo *LibInfo) = 0;
   virtual bool shouldFavorPostInc() const = 0;
   virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0;
-  virtual bool isLegalMaskedStore(Type *DataType) = 0;
-  virtual bool isLegalMaskedLoad(Type *DataType) = 0;
-  virtual bool isLegalNTStore(Type *DataType, unsigned Alignment) = 0;
-  virtual bool isLegalNTLoad(Type *DataType, unsigned Alignment) = 0;
+  virtual bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) = 0;
+  virtual bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) = 0;
+  virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
+  virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
   virtual bool isLegalMaskedScatter(Type *DataType) = 0;
   virtual bool isLegalMaskedGather(Type *DataType) = 0;
   virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
@@ -1196,8 +1228,6 @@ public:
   virtual bool isProfitableToHoist(Instruction *I) = 0;
   virtual bool useAA() = 0;
   virtual bool isTypeLegal(Type *Ty) = 0;
-  virtual unsigned getJumpBufAlignment() = 0;
-  virtual unsigned getJumpBufSize() = 0;
   virtual bool shouldBuildLookupTables() = 0;
   virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
   virtual bool useColdCCForColdCall(Function &F) = 0;
@@ -1228,19 +1258,35 @@ public:
                             Type *Ty) = 0;
   virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
                             Type *Ty) = 0;
-  virtual unsigned getNumberOfRegisters(bool Vector) = 0;
+  virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
+  virtual unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const = 0;
+  virtual const char* getRegisterClassName(unsigned ClassID) const = 0;
   virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
   virtual unsigned getMinVectorRegisterBitWidth() = 0;
   virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
   virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0;
   virtual bool shouldConsiderAddressTypePromotion(
       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
-  virtual unsigned getCacheLineSize() = 0;
-  virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) = 0;
-  virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) = 0;
-  virtual unsigned getPrefetchDistance() = 0;
-  virtual unsigned getMinPrefetchStride() = 0;
-  virtual unsigned getMaxPrefetchIterationsAhead() = 0;
+  virtual unsigned getCacheLineSize() const = 0;
+  virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
+  virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const = 0;
+
+  /// \return How much before a load we should place the prefetch
+  /// instruction.  This is currently measured in number of
+  /// instructions.
+  virtual unsigned getPrefetchDistance() const = 0;
+
+  /// \return Some HW prefetchers can handle accesses up to a certain
+  /// constant stride.  This is the minimum stride in bytes where it
+  /// makes sense to start adding SW prefetches.  The default is 1,
+  /// i.e. prefetch with any stride.
+  virtual unsigned getMinPrefetchStride() const = 0;
+
+  /// \return The maximum number of iterations to prefetch ahead.  If
+  /// the required number of iterations is more than this number, no
+  /// prefetching is performed.
+  virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
+
   virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
   virtual unsigned
   getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
@@ -1395,6 +1441,16 @@ public:
     return Impl.getFlatAddressSpace();
   }
 
+  bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
+                                  Intrinsic::ID IID) const override {
+    return Impl.collectFlatAddressOperands(OpIndexes, IID);
+  }
+
+  bool rewriteIntrinsicWithAddressSpace(
+    IntrinsicInst *II, Value *OldV, Value *NewV) const override {
+    return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
+  }
+
   bool isLoweredToCall(const Function *F) override {
     return Impl.isLoweredToCall(F);
   }
@@ -1440,16 +1496,16 @@ public:
   bool shouldFavorBackedgeIndex(const Loop *L) const override {
     return Impl.shouldFavorBackedgeIndex(L);
   }
-  bool isLegalMaskedStore(Type *DataType) override {
-    return Impl.isLegalMaskedStore(DataType);
+  bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) override {
+    return Impl.isLegalMaskedStore(DataType, Alignment);
   }
-  bool isLegalMaskedLoad(Type *DataType) override {
-    return Impl.isLegalMaskedLoad(DataType);
+  bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) override {
+    return Impl.isLegalMaskedLoad(DataType, Alignment);
   }
-  bool isLegalNTStore(Type *DataType, unsigned Alignment) override {
+  bool isLegalNTStore(Type *DataType, Align Alignment) override {
     return Impl.isLegalNTStore(DataType, Alignment);
   }
-  bool isLegalNTLoad(Type *DataType, unsigned Alignment) override {
+  bool isLegalNTLoad(Type *DataType, Align Alignment) override {
     return Impl.isLegalNTLoad(DataType, Alignment);
   }
   bool isLegalMaskedScatter(Type *DataType) override {
@@ -1490,8 +1546,6 @@ public:
   }
   bool useAA() override { return Impl.useAA(); }
   bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
-  unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); }
-  unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); }
   bool shouldBuildLookupTables() override {
     return Impl.shouldBuildLookupTables();
   }
@@ -1563,8 +1617,14 @@ public:
                     Type *Ty) override {
     return Impl.getIntImmCost(IID, Idx, Imm, Ty);
   }
-  unsigned getNumberOfRegisters(bool Vector) override {
-    return Impl.getNumberOfRegisters(Vector);
+  unsigned getNumberOfRegisters(unsigned ClassID) const override {
+    return Impl.getNumberOfRegisters(ClassID);
+  }
+  unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const override {
+    return Impl.getRegisterClassForType(Vector, Ty);
+  }
+  const char* getRegisterClassName(unsigned ClassID) const override {
+    return Impl.getRegisterClassName(ClassID);
   }
   unsigned getRegisterBitWidth(bool Vector) const override {
     return Impl.getRegisterBitWidth(Vector);
@@ -1583,22 +1643,36 @@ public:
     return Impl.shouldConsiderAddressTypePromotion(
         I, AllowPromotionWithoutCommonHeader);
   }
-  unsigned getCacheLineSize() override {
+  unsigned getCacheLineSize() const override {
     return Impl.getCacheLineSize();
   }
-  llvm::Optional<unsigned> getCacheSize(CacheLevel Level) override {
+  llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const override {
     return Impl.getCacheSize(Level);
   }
-  llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) override {
+  llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const override {
     return Impl.getCacheAssociativity(Level);
   }
-  unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); }
-  unsigned getMinPrefetchStride() override {
+
+  /// Return the preferred prefetch distance in terms of instructions.
+  ///
+  unsigned getPrefetchDistance() const override {
+    return Impl.getPrefetchDistance();
+  }
+
+  /// Return the minimum stride necessary to trigger software
+  /// prefetching.
+  ///
+  unsigned getMinPrefetchStride() const override {
     return Impl.getMinPrefetchStride();
   }
-  unsigned getMaxPrefetchIterationsAhead() override {
+
+  /// Return the maximum prefetch distance in terms of loop
+  /// iterations.
+  ///
+  unsigned getMaxPrefetchIterationsAhead() const override {
     return Impl.getMaxPrefetchIterationsAhead();
   }
+
   unsigned getMaxInterleaveFactor(unsigned VF) override {
     return Impl.getMaxInterleaveFactor(VF);
   }
diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h
index b99e1eb9adf0..a431fa0d458b 100644
--- a/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -156,6 +156,16 @@ public:
     return -1;
   }
 
+  bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
+                                  Intrinsic::ID IID) const {
+    return false;
+  }
+
+  bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
+                                        Value *OldV, Value *NewV) const {
+    return false;
+  }
+
   bool isLoweredToCall(const Function *F) {
     assert(F && "A concrete function must be provided to this routine.");
 
@@ -233,18 +243,18 @@ public:
 
   bool shouldFavorBackedgeIndex(const Loop *L) const { return false; }
 
-  bool isLegalMaskedStore(Type *DataType) { return false; }
+  bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) { return false; }
 
-  bool isLegalMaskedLoad(Type *DataType) { return false; }
+  bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) { return false; }
 
-  bool isLegalNTStore(Type *DataType, unsigned Alignment) {
+  bool isLegalNTStore(Type *DataType, Align Alignment) {
     // By default, assume nontemporal memory stores are available for stores
     // that are aligned and have a size that is a power of 2.
     unsigned DataSize = DL.getTypeStoreSize(DataType);
     return Alignment >= DataSize && isPowerOf2_32(DataSize);
   }
 
-  bool isLegalNTLoad(Type *DataType, unsigned Alignment) {
+  bool isLegalNTLoad(Type *DataType, Align Alignment) {
     // By default, assume nontemporal memory loads are available for loads that
     // are aligned and have a size that is a power of 2.
     unsigned DataSize = DL.getTypeStoreSize(DataType);
@@ -284,10 +294,6 @@ public:
 
   bool isTypeLegal(Type *Ty) { return false; }
 
-  unsigned getJumpBufAlignment() { return 0; }
-
-  unsigned getJumpBufSize() { return 0; }
-
   bool shouldBuildLookupTables() { return true; }
   bool shouldBuildLookupTablesForConstant(Constant *C) { return true; }
 
@@ -348,7 +354,20 @@ public:
     return TTI::TCC_Free;
   }
 
-  unsigned getNumberOfRegisters(bool Vector) { return 8; }
+  unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; }
+
+  unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const {
+    return Vector ? 1 : 0;
+  };
+
+  const char* getRegisterClassName(unsigned ClassID) const {
+    switch (ClassID) {
+      default:
+        return "Generic::Unknown Register Class";
+      case 0: return "Generic::ScalarRC";
+      case 1: return "Generic::VectorRC";
+    }
+  }
 
   unsigned getRegisterBitWidth(bool Vector) const { return 32; }
 
@@ -365,21 +384,20 @@ public:
     return false;
   }
 
-  unsigned getCacheLineSize() { return 0; }
+  unsigned getCacheLineSize() const { return 0; }
 
-  llvm::Optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level) {
+  llvm::Optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level) const {
     switch (Level) {
     case TargetTransformInfo::CacheLevel::L1D:
       LLVM_FALLTHROUGH;
     case TargetTransformInfo::CacheLevel::L2D:
       return llvm::Optional<unsigned>();
     }
-
     llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
   }
 
   llvm::Optional<unsigned> getCacheAssociativity(
-    TargetTransformInfo::CacheLevel Level) {
+    TargetTransformInfo::CacheLevel Level) const {
     switch (Level) {
     case TargetTransformInfo::CacheLevel::L1D:
       LLVM_FALLTHROUGH;
@@ -390,11 +408,9 @@ public:
     llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
   }
 
-  unsigned getPrefetchDistance() { return 0; }
-
-  unsigned getMinPrefetchStride() { return 1; }
-
-  unsigned getMaxPrefetchIterationsAhead() { return UINT_MAX; }
+  unsigned getPrefetchDistance() const { return 0; }
+  unsigned getMinPrefetchStride() const { return 1; }
+  unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; }
 
   unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
 
@@ -830,6 +846,9 @@ public:
     if (isa<PHINode>(U))
       return TTI::TCC_Free; // Model all PHI nodes as free.
 
+    if (isa<ExtractValueInst>(U))
+      return TTI::TCC_Free; // Model all ExtractValue nodes as free.
+
     // Static alloca doesn't generate target instructions.
     if (auto *A = dyn_cast<AllocaInst>(U))
       if (A->isStaticAlloca())
diff --git a/include/llvm/Analysis/TypeMetadataUtils.h b/include/llvm/Analysis/TypeMetadataUtils.h
index 82cf8efeea54..43ce26147c2e 100644
--- a/include/llvm/Analysis/TypeMetadataUtils.h
+++ b/include/llvm/Analysis/TypeMetadataUtils.h
@@ -50,6 +50,8 @@ void findDevirtualizableCallsForTypeCheckedLoad(
     SmallVectorImpl<Instruction *> &LoadedPtrs,
     SmallVectorImpl<Instruction *> &Preds, bool &HasNonCallUses,
     const CallInst *CI, DominatorTree &DT);
+
+Constant *getPointerAtOffset(Constant *I, uint64_t Offset, Module &M);
 }
 
 #endif
diff --git a/include/llvm/Analysis/Utils/Local.h b/include/llvm/Analysis/Utils/Local.h
index acbdf5dca32c..a63bcec9bc41 100644
--- a/include/llvm/Analysis/Utils/Local.h
+++ b/include/llvm/Analysis/Utils/Local.h
@@ -32,7 +32,7 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP,
   Value *Result = Constant::getNullValue(IntPtrTy);
 
   // If the GEP is inbounds, we know that none of the addressing operations will
-  // overflow in an unsigned sense.
+  // overflow in a signed sense.
   bool isInBounds = GEPOp->isInBounds() && !NoAssumptions;
 
   // Build a mask for high order bits.
@@ -51,10 +51,7 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP,
 
       // Handle a struct index, which adds its field offset to the pointer.
       if (StructType *STy = GTI.getStructTypeOrNull()) {
-        if (OpC->getType()->isVectorTy())
-          OpC = OpC->getSplatValue();
-
-        uint64_t OpValue = cast<ConstantInt>(OpC)->getZExtValue();
+        uint64_t OpValue = OpC->getUniqueInteger().getZExtValue();
         Size = DL.getStructLayout(STy)->getElementOffset(OpValue);
 
         if (Size)
@@ -63,20 +60,31 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP,
         continue;
       }
 
+      // Splat the constant if needed.
+      if (IntPtrTy->isVectorTy() && !OpC->getType()->isVectorTy())
+        OpC = ConstantVector::getSplat(IntPtrTy->getVectorNumElements(), OpC);
+
       Constant *Scale = ConstantInt::get(IntPtrTy, Size);
       Constant *OC = ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/);
-      Scale = ConstantExpr::getMul(OC, Scale, isInBounds/*NUW*/);
+      Scale =
+          ConstantExpr::getMul(OC, Scale, false /*NUW*/, isInBounds /*NSW*/);
       // Emit an add instruction.
       Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs");
       continue;
     }
+
+    // Splat the index if needed.
+    if (IntPtrTy->isVectorTy() && !Op->getType()->isVectorTy())
+      Op = Builder->CreateVectorSplat(IntPtrTy->getVectorNumElements(), Op);
+
     // Convert to correct type.
     if (Op->getType() != IntPtrTy)
       Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c");
     if (Size != 1) {
       // We'll let instcombine(mul) convert this to a shl if possible.
       Op = Builder->CreateMul(Op, ConstantInt::get(IntPtrTy, Size),
-                              GEP->getName()+".idx", isInBounds /*NUW*/);
+                              GEP->getName() + ".idx", false /*NUW*/,
+                              isInBounds /*NSW*/);
     }
 
     // Emit an add instruction.
diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h
index fa7e0e0eef7e..33b064fcf9d2 100644
--- a/include/llvm/Analysis/ValueTracking.h
+++ b/include/llvm/Analysis/ValueTracking.h
@@ -242,19 +242,21 @@ class Value;
   /// This is a wrapper around Value::stripAndAccumulateConstantOffsets that
   /// creates and later unpacks the required APInt.
   inline Value *GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
-                                                 const DataLayout &DL) {
+                                                 const DataLayout &DL,
+                                                 bool AllowNonInbounds = true) {
     APInt OffsetAPInt(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
     Value *Base =
-        Ptr->stripAndAccumulateConstantOffsets(DL, OffsetAPInt,
-                                               /* AllowNonInbounds */ true);
+        Ptr->stripAndAccumulateConstantOffsets(DL, OffsetAPInt, AllowNonInbounds);
+
     Offset = OffsetAPInt.getSExtValue();
     return Base;
   }
-  inline const Value *GetPointerBaseWithConstantOffset(const Value *Ptr,
-                                                       int64_t &Offset,
-                                                       const DataLayout &DL) {
-    return GetPointerBaseWithConstantOffset(const_cast<Value *>(Ptr), Offset,
-                                            DL);
+  inline const Value *
+  GetPointerBaseWithConstantOffset(const Value *Ptr, int64_t &Offset,
+                                   const DataLayout &DL,
+                                   bool AllowNonInbounds = true) {
+    return GetPointerBaseWithConstantOffset(const_cast<Value *>(Ptr), Offset, DL,
+                                            AllowNonInbounds);
   }
 
   /// Returns true if the GEP is based on a pointer to a string (array of
@@ -307,20 +309,26 @@ class Value;
   uint64_t GetStringLength(const Value *V, unsigned CharSize = 8);
 
   /// This function returns call pointer argument that is considered the same by
-  /// aliasing rules. You CAN'T use it to replace one value with another.
-  const Value *getArgumentAliasingToReturnedPointer(const CallBase *Call);
-  inline Value *getArgumentAliasingToReturnedPointer(CallBase *Call) {
+  /// aliasing rules. You CAN'T use it to replace one value with another. If
+  /// \p MustPreserveNullness is true, the call must preserve the nullness of
+  /// the pointer.
+  const Value *getArgumentAliasingToReturnedPointer(const CallBase *Call,
+                                                    bool MustPreserveNullness);
+  inline Value *
+  getArgumentAliasingToReturnedPointer(CallBase *Call,
+                                       bool MustPreserveNullness) {
     return const_cast<Value *>(getArgumentAliasingToReturnedPointer(
-        const_cast<const CallBase *>(Call)));
+        const_cast<const CallBase *>(Call), MustPreserveNullness));
   }
 
-  // {launder,strip}.invariant.group returns pointer that aliases its argument,
-  // and it only captures pointer by returning it.
-  // These intrinsics are not marked as nocapture, because returning is
-  // considered as capture. The arguments are not marked as returned neither,
-  // because it would make it useless.
+  /// {launder,strip}.invariant.group returns pointer that aliases its argument,
+  /// and it only captures pointer by returning it.
+  /// These intrinsics are not marked as nocapture, because returning is
+  /// considered as capture. The arguments are not marked as returned neither,
+  /// because it would make it useless. If \p MustPreserveNullness is true,
+  /// the intrinsic must preserve the nullness of the pointer.
   bool isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
-      const CallBase *Call);
+      const CallBase *Call, bool MustPreserveNullness);
 
   /// This method strips off any GEP address adjustments and pointer casts from
   /// the specified value, returning the original object being addressed. Note
@@ -376,6 +384,13 @@ class Value;
   /// Return true if the only users of this pointer are lifetime markers.
   bool onlyUsedByLifetimeMarkers(const Value *V);
 
+  /// Return true if speculation of the given load must be suppressed to avoid
+  /// ordering or interfering with an active sanitizer.  If not suppressed,
+  /// dereferenceability and alignment must be proven separately.  Note: This
+  /// is only needed for raw reasoning; if you use the interface below
+  /// (isSafeToSpeculativelyExecute), this is handled internally.
+  bool mustSuppressSpeculation(const LoadInst &LI);
+
   /// Return true if the instruction does not have any effects besides
   /// calculating the result and does not have undefined behavior.
   ///
@@ -605,12 +620,12 @@ class Value;
   SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
                                          Instruction::CastOps *CastOp = nullptr,
                                          unsigned Depth = 0);
+
   inline SelectPatternResult
-  matchSelectPattern(const Value *V, const Value *&LHS, const Value *&RHS,
-                     Instruction::CastOps *CastOp = nullptr) {
-    Value *L = const_cast<Value*>(LHS);
-    Value *R = const_cast<Value*>(RHS);
-    auto Result = matchSelectPattern(const_cast<Value*>(V), L, R);
+  matchSelectPattern(const Value *V, const Value *&LHS, const Value *&RHS) {
+    Value *L = const_cast<Value *>(LHS);
+    Value *R = const_cast<Value *>(RHS);
+    auto Result = matchSelectPattern(const_cast<Value *>(V), L, R);
     LHS = L;
     RHS = R;
     return Result;
@@ -654,6 +669,12 @@ class Value;
   Optional<bool> isImpliedByDomCondition(const Value *Cond,
                                          const Instruction *ContextI,
                                          const DataLayout &DL);
+
+  /// If Ptr1 is provably equal to Ptr2 plus a constant offset, return that
+  /// offset. For example, Ptr1 might be &A[42], and Ptr2 might be &A[40]. In
+  /// this case offset would be -8.
+  Optional<int64_t> isPointerOffset(const Value *Ptr1, const Value *Ptr2,
+                                    const DataLayout &DL);
 } // end namespace llvm
 
 #endif // LLVM_ANALYSIS_VALUETRACKING_H
diff --git a/include/llvm/Analysis/VectorUtils.h b/include/llvm/Analysis/VectorUtils.h
index d93d2bc4570b..4a61c2bc35c7 100644
--- a/include/llvm/Analysis/VectorUtils.h
+++ b/include/llvm/Analysis/VectorUtils.h
@@ -15,18 +15,129 @@
 
 #include "llvm/ADT/MapVector.h"
 #include "llvm/Analysis/LoopAccessAnalysis.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/Support/CheckedArithmetic.h"
 
 namespace llvm {
 
+/// Describes the type of Parameters
+enum class VFParamKind {
+  Vector,            // No semantic information.
+  OMP_Linear,        // declare simd linear(i)
+  OMP_LinearRef,     // declare simd linear(ref(i))
+  OMP_LinearVal,     // declare simd linear(val(i))
+  OMP_LinearUVal,    // declare simd linear(uval(i))
+  OMP_LinearPos,     // declare simd linear(i:c) uniform(c)
+  OMP_LinearValPos,  // declare simd linear(val(i:c)) uniform(c)
+  OMP_LinearRefPos,  // declare simd linear(ref(i:c)) uniform(c)
+  OMP_LinearUValPos, // declare simd linear(uval(i:c)) uniform(c
+  OMP_Uniform,       // declare simd uniform(i)
+  GlobalPredicate,   // Global logical predicate that acts on all lanes
+                     // of the input and output mask concurrently. For
+                     // example, it is implied by the `M` token in the
+                     // Vector Function ABI mangled name.
+  Unknown
+};
+
+/// Describes the type of Instruction Set Architecture
+enum class VFISAKind {
+  AdvancedSIMD, // AArch64 Advanced SIMD (NEON)
+  SVE,          // AArch64 Scalable Vector Extension
+  SSE,          // x86 SSE
+  AVX,          // x86 AVX
+  AVX2,         // x86 AVX2
+  AVX512,       // x86 AVX512
+  Unknown       // Unknown ISA
+};
+
+/// Encapsulates information needed to describe a parameter.
+///
+/// The description of the parameter is not linked directly to
+/// OpenMP or any other vector function description. This structure
+/// is extendible to handle other paradigms that describe vector
+/// functions and their parameters.
+struct VFParameter {
+  unsigned ParamPos;         // Parameter Position in Scalar Function.
+  VFParamKind ParamKind;     // Kind of Parameter.
+  int LinearStepOrPos = 0;   // Step or Position of the Parameter.
+  Align Alignment = Align(); // Optional aligment in bytes, defaulted to 1.
+
+  // Comparison operator.
+  bool operator==(const VFParameter &Other) const {
+    return std::tie(ParamPos, ParamKind, LinearStepOrPos, Alignment) ==
+           std::tie(Other.ParamPos, Other.ParamKind, Other.LinearStepOrPos,
+                    Other.Alignment);
+  }
+};
+
+/// Contains the information about the kind of vectorization
+/// available.
+///
+/// This object in independent on the paradigm used to
+/// represent vector functions. in particular, it is not attached to
+/// any target-specific ABI.
+struct VFShape {
+  unsigned VF;     // Vectorization factor.
+  bool IsScalable; // True if the function is a scalable function.
+  VFISAKind ISA;   // Instruction Set Architecture.
+  SmallVector<VFParameter, 8> Parameters; // List of parameter informations.
+  // Comparison operator.
+  bool operator==(const VFShape &Other) const {
+    return std::tie(VF, IsScalable, ISA, Parameters) ==
+           std::tie(Other.VF, Other.IsScalable, Other.ISA, Other.Parameters);
+  }
+};
+
+/// Holds the VFShape for a specific scalar to vector function mapping.
+struct VFInfo {
+  VFShape Shape;        // Classification of the vector function.
+  StringRef ScalarName; // Scalar Function Name.
+  StringRef VectorName; // Vector Function Name associated to this VFInfo.
+
+  // Comparison operator.
+  bool operator==(const VFInfo &Other) const {
+    return std::tie(Shape, ScalarName, VectorName) ==
+           std::tie(Shape, Other.ScalarName, Other.VectorName);
+  }
+};
+
+namespace VFABI {
+/// Function to contruct a VFInfo out of a mangled names in the
+/// following format:
+///
+/// <VFABI_name>{(<redirection>)}
+///
+/// where <VFABI_name> is the name of the vector function, mangled according
+/// to the rules described in the Vector Function ABI of the target vector
+/// extentsion (or <isa> from now on). The <VFABI_name> is in the following
+/// format:
+///
+/// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)]
+///
+/// This methods support demangling rules for the following <isa>:
+///
+/// * AArch64: https://developer.arm.com/docs/101129/latest
+///
+/// * x86 (libmvec): https://sourceware.org/glibc/wiki/libmvec and
+///  https://sourceware.org/glibc/wiki/libmvec?action=AttachFile&do=view&target=VectorABI.txt
+///
+///
+///
+/// \param MangledName -> input string in the format
+/// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)].
+Optional<VFInfo> tryDemangleForVFABI(StringRef MangledName);
+
+/// Retrieve the `VFParamKind` from a string token.
+VFParamKind getVFParamKindFromString(const StringRef Token);
+} // end namespace VFABI
+
 template <typename T> class ArrayRef;
 class DemandedBits;
 class GetElementPtrInst;
 template <typename InstTy> class InterleaveGroup;
 class Loop;
 class ScalarEvolution;
+class TargetLibraryInfo;
 class TargetTransformInfo;
 class Type;
 class Value;
@@ -270,13 +381,12 @@ APInt possiblyDemandedEltsInMask(Value *Mask);
 /// the interleaved store group doesn't allow gaps.
 template <typename InstTy> class InterleaveGroup {
 public:
-  InterleaveGroup(uint32_t Factor, bool Reverse, uint32_t Align)
-      : Factor(Factor), Reverse(Reverse), Align(Align), InsertPos(nullptr) {}
-
-  InterleaveGroup(InstTy *Instr, int32_t Stride, uint32_t Align)
-      : Align(Align), InsertPos(Instr) {
-    assert(Align && "The alignment should be non-zero");
+  InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
+      : Factor(Factor), Reverse(Reverse), Alignment(Alignment),
+        InsertPos(nullptr) {}
 
+  InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
+      : Alignment(Alignment), InsertPos(Instr) {
     Factor = std::abs(Stride);
     assert(Factor > 1 && "Invalid interleave factor");
 
@@ -286,7 +396,7 @@ public:
 
   bool isReverse() const { return Reverse; }
   uint32_t getFactor() const { return Factor; }
-  uint32_t getAlignment() const { return Align; }
+  uint32_t getAlignment() const { return Alignment.value(); }
   uint32_t getNumMembers() const { return Members.size(); }
 
   /// Try to insert a new member \p Instr with index \p Index and
@@ -294,9 +404,7 @@ public:
   /// negative if it is the new leader.
   ///
   /// \returns false if the instruction doesn't belong to the group.
-  bool insertMember(InstTy *Instr, int32_t Index, uint32_t NewAlign) {
-    assert(NewAlign && "The new member's alignment should be non-zero");
-
+  bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign) {
     // Make sure the key fits in an int32_t.
     Optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey);
     if (!MaybeKey)
@@ -328,7 +436,7 @@ public:
     }
 
     // It's always safe to select the minimum alignment.
-    Align = std::min(Align, NewAlign);
+    Alignment = std::min(Alignment, NewAlign);
     Members[Key] = Instr;
     return true;
   }
@@ -387,7 +495,7 @@ public:
 private:
   uint32_t Factor; // Interleave Factor.
   bool Reverse;
-  uint32_t Align;
+  Align Alignment;
   DenseMap<int32_t, InstTy *> Members;
   int32_t SmallestKey = 0;
   int32_t LargestKey = 0;
@@ -504,8 +612,8 @@ private:
   struct StrideDescriptor {
     StrideDescriptor() = default;
     StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
-                     unsigned Align)
-        : Stride(Stride), Scev(Scev), Size(Size), Align(Align) {}
+                     Align Alignment)
+        : Stride(Stride), Scev(Scev), Size(Size), Alignment(Alignment) {}
 
     // The access's stride. It is negative for a reverse access.
     int64_t Stride = 0;
@@ -517,7 +625,7 @@ private:
     uint64_t Size = 0;
 
     // The alignment of this access.
-    unsigned Align = 0;
+    Align Alignment;
   };
 
   /// A type for holding instructions and their stride descriptors.
@@ -528,11 +636,11 @@ private:
   ///
   /// \returns the newly created interleave group.
   InterleaveGroup<Instruction> *
-  createInterleaveGroup(Instruction *Instr, int Stride, unsigned Align) {
+  createInterleaveGroup(Instruction *Instr, int Stride, Align Alignment) {
     assert(!InterleaveGroupMap.count(Instr) &&
            "Already in an interleaved access group");
     InterleaveGroupMap[Instr] =
-        new InterleaveGroup<Instruction>(Instr, Stride, Align);
+        new InterleaveGroup<Instruction>(Instr, Stride, Alignment);
     InterleaveGroups.insert(InterleaveGroupMap[Instr]);
     return InterleaveGroupMap[Instr];
   }