42 files changed, 2408 insertions, 713 deletions
diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h
index d8e50438e722..1b8b9751faa1 100644
--- a/include/llvm/Analysis/AliasAnalysis.h
+++ b/include/llvm/Analysis/AliasAnalysis.h
@@ -443,11 +443,7 @@ public:
 
   /// getModRefInfo (for fences) - Return information about whether
   /// a particular store modifies or reads the specified memory location.
-  ModRefInfo getModRefInfo(const FenceInst *S, const MemoryLocation &Loc) {
-    // Conservatively correct.  (We could possibly be a bit smarter if
-    // Loc is a alloca that doesn't escape.)
-    return MRI_ModRef;
-  }
+  ModRefInfo getModRefInfo(const FenceInst *S, const MemoryLocation &Loc);
 
   /// getModRefInfo (for fences) - A convenience wrapper.
   ModRefInfo getModRefInfo(const FenceInst *S, const Value *P, uint64_t Size) {
@@ -528,6 +524,14 @@ public:
   /// Check whether or not an instruction may read or write the specified
   /// memory location.
   ///
+  /// Note explicitly that getModRefInfo considers the effects of reading and
+  /// writing the memory location, and not the effect of ordering relative to
+  /// other instructions.  Thus, a volatile load is considered to be Ref,
+  /// because it does not actually write memory, it just can't be reordered
+  /// relative to other volatiles (or removed).  Atomic ordered loads/stores are
+  /// considered ModRef ATM because conservatively, the visible effect appears
+  /// as if memory was written, not just an ordering constraint.
+  ///
   /// An instruction that doesn't read or write memory may be trivially LICM'd
   /// for example.
   ///
diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h
index 5d11b22c6eed..eac97501c759 100644
--- a/include/llvm/Analysis/AliasSetTracker.h
+++ b/include/llvm/Analysis/AliasSetTracker.h
@@ -121,7 +121,10 @@ class AliasSet : public ilist_node<AliasSet> {
   AliasSet *Forward;
 
   /// All instructions without a specific address in this alias set.
-  std::vector<AssertingVH<Instruction> > UnknownInsts;
+  /// In rare cases this vector can have a null'ed out WeakVH
+  /// instances (can happen if some other loop pass deletes an
+  /// instruction in this list).
+  std::vector<WeakVH> UnknownInsts;
 
   /// Number of nodes pointing to this AliasSet plus the number of AliasSets
   /// forwarding to it.
@@ -171,7 +174,7 @@ class AliasSet : public ilist_node<AliasSet> {
 
   Instruction *getUnknownInst(unsigned i) const {
     assert(i < UnknownInsts.size());
-    return UnknownInsts[i];
+    return cast_or_null<Instruction>(UnknownInsts[i]);
   }
 
 public:
diff --git a/include/llvm/Analysis/AssumptionCache.h b/include/llvm/Analysis/AssumptionCache.h
index 79287ed76f2e..f833f417c7dd 100644
--- a/include/llvm/Analysis/AssumptionCache.h
+++ b/include/llvm/Analysis/AssumptionCache.h
@@ -31,11 +31,10 @@ namespace llvm {
 /// \brief A cache of @llvm.assume calls within a function.
 ///
 /// This cache provides fast lookup of assumptions within a function by caching
-/// them and amortizing the cost of scanning for them across all queries. The
-/// cache is also conservatively self-updating so that it will never return
-/// incorrect results about a function even as the function is being mutated.
-/// However, flushing the cache and rebuilding it (or explicitly updating it)
-/// may allow it to discover new assumptions.
+/// them and amortizing the cost of scanning for them across all queries. Passes
+/// that create new assumptions are required to call registerAssumption() to
+/// register any new @llvm.assume calls that they create. Deletions of
+/// @llvm.assume calls do not require special handling.
 class AssumptionCache {
   /// \brief The function for which this cache is handling assumptions.
   ///
@@ -87,6 +86,13 @@ public:
   /// its instructions.
   AssumptionCache(Function &F) : F(F), Scanned(false) {}
 
+  /// This cache is designed to be self-updating and so it should never be
+  /// invalidated.
+  bool invalidate(Function &, const PreservedAnalyses &,
+                  FunctionAnalysisManager::Invalidator &) {
+    return false;
+  }
+
   /// \brief Add an @llvm.assume intrinsic to this function's cache.
   ///
   /// The call passed in must be an instruction within this function and must
@@ -196,7 +202,10 @@ public:
   AssumptionCacheTracker();
   ~AssumptionCacheTracker() override;
 
-  void releaseMemory() override { AssumptionCaches.shrink_and_clear(); }
+  void releaseMemory() override {
+    verifyAnalysis();
+    AssumptionCaches.shrink_and_clear();
+  }
 
   void verifyAnalysis() const override;
   bool doFinalization(Module &) override {
diff --git a/include/llvm/Analysis/BasicAliasAnalysis.h b/include/llvm/Analysis/BasicAliasAnalysis.h
index addfffa01061..14e4bded264a 100644
--- a/include/llvm/Analysis/BasicAliasAnalysis.h
+++ b/include/llvm/Analysis/BasicAliasAnalysis.h
@@ -233,6 +233,24 @@ FunctionPass *createBasicAAWrapperPass();
 /// populated to the best of our ability for a particular function when inside
 /// of a \c ModulePass or a \c CallGraphSCCPass.
 BasicAAResult createLegacyPMBasicAAResult(Pass &P, Function &F);
+
+/// This class is a functor to be used in legacy module or SCC passes for
+/// computing AA results for a function. We store the results in fields so that
+/// they live long enough to be queried, but we re-use them each time.
+class LegacyAARGetter {
+  Pass &P;
+  Optional<BasicAAResult> BAR;
+  Optional<AAResults> AAR;
+
+public:
+  LegacyAARGetter(Pass &P) : P(P) {}
+  AAResults &operator()(Function &F) {
+    BAR.emplace(createLegacyPMBasicAAResult(P, F));
+    AAR.emplace(createLegacyPMAAResults(P, F, *BAR));
+    return *AAR;
+  }
+};
+
 }
 
 #endif
diff --git a/include/llvm/Analysis/BlockFrequencyInfo.h b/include/llvm/Analysis/BlockFrequencyInfo.h
index 562041d11fa1..cbae01c9102f 100644
--- a/include/llvm/Analysis/BlockFrequencyInfo.h
+++ b/include/llvm/Analysis/BlockFrequencyInfo.h
@@ -45,6 +45,10 @@ public:
 
   ~BlockFrequencyInfo();
 
+  /// Handle invalidation explicitly.
+  bool invalidate(Function &F, const PreservedAnalyses &PA,
+                  FunctionAnalysisManager::Invalidator &);
+
   const Function *getFunction() const;
   const BranchProbabilityInfo *getBPI() const;
   void view() const;
@@ -69,6 +73,12 @@ public:
   // Set the frequency of the given basic block.
   void setBlockFreq(const BasicBlock *BB, uint64_t Freq);
 
+  /// Set the frequency of \p ReferenceBB to \p Freq and scale the frequencies
+  /// of the blocks in \p BlocksToScale such that their frequencies relative
+  /// to \p ReferenceBB remain unchanged.
+  void setBlockFreqAndScale(const BasicBlock *ReferenceBB, uint64_t Freq,
+                            SmallPtrSetImpl<BasicBlock *> &BlocksToScale);
+
   /// calculate - compute block frequency info for the given function.
   void calculate(const Function &F, const BranchProbabilityInfo &BPI,
                  const LoopInfo &LI);
diff --git a/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/include/llvm/Analysis/BlockFrequencyInfoImpl.h
index 3f4428d18740..e3d81fea49ea 100644
--- a/include/llvm/Analysis/BlockFrequencyInfoImpl.h
+++ b/include/llvm/Analysis/BlockFrequencyInfoImpl.h
@@ -1291,11 +1291,14 @@ struct BFIDOTGraphTraitsBase : public DefaultDOTGraphTraits {
   }
 
   std::string getNodeLabel(NodeRef Node, const BlockFrequencyInfoT *Graph,
-                           GVDAGType GType) {
+                           GVDAGType GType, int layout_order = -1) {
     std::string Result;
     raw_string_ostream OS(Result);
 
-    OS << Node->getName().str() << " : ";
+    if (layout_order != -1)
+      OS << Node->getName() << "[" << layout_order << "] : ";
+    else
+      OS << Node->getName() << " : ";
     switch (GType) {
     case GVDT_Fraction:
       Graph->printBlockFreq(OS, Node);
diff --git a/include/llvm/Analysis/BranchProbabilityInfo.h b/include/llvm/Analysis/BranchProbabilityInfo.h
index 14b7a7f529f7..6a876679543d 100644
--- a/include/llvm/Analysis/BranchProbabilityInfo.h
+++ b/include/llvm/Analysis/BranchProbabilityInfo.h
@@ -164,6 +164,8 @@ private:
   /// \brief Track the set of blocks that always lead to a cold call.
   SmallPtrSet<const BasicBlock *, 16> PostDominatedByColdCall;
 
+  void updatePostDominatedByUnreachable(const BasicBlock *BB);
+  void updatePostDominatedByColdCall(const BasicBlock *BB);
   bool calcUnreachableHeuristics(const BasicBlock *BB);
   bool calcMetadataWeights(const BasicBlock *BB);
   bool calcColdCallHeuristics(const BasicBlock *BB);
diff --git a/include/llvm/Analysis/CFGPrinter.h b/include/llvm/Analysis/CFGPrinter.h
index efaa9d6df8ea..5786769cc500 100644
--- a/include/llvm/Analysis/CFGPrinter.h
+++ b/include/llvm/Analysis/CFGPrinter.h
@@ -140,8 +140,7 @@ struct DOTGraphTraits<const Function*> : public DefaultDOTGraphTraits {
 
       std::string Str;
       raw_string_ostream OS(Str);
-      SwitchInst::ConstCaseIt Case =
-          SwitchInst::ConstCaseIt::fromSuccessorIndex(SI, SuccNo);
+      auto Case = *SwitchInst::ConstCaseIt::fromSuccessorIndex(SI, SuccNo);
       OS << Case.getCaseValue()->getValue();
       return OS.str();
     }
diff --git a/include/llvm/Analysis/CGSCCPassManager.h b/include/llvm/Analysis/CGSCCPassManager.h
index 6fbe532112b2..398bbfb0c413 100644
--- a/include/llvm/Analysis/CGSCCPassManager.h
+++ b/include/llvm/Analysis/CGSCCPassManager.h
@@ -334,6 +334,7 @@ public:
                             InvalidSCCSet, nullptr,   nullptr};
 
     PreservedAnalyses PA = PreservedAnalyses::all();
+    CG.buildRefSCCs();
     for (auto RCI = CG.postorder_ref_scc_begin(),
               RCE = CG.postorder_ref_scc_end();
          RCI != RCE;) {
diff --git a/include/llvm/Analysis/CallGraph.h b/include/llvm/Analysis/CallGraph.h
index 4ecbaa75ac75..ea85436ee580 100644
--- a/include/llvm/Analysis/CallGraph.h
+++ b/include/llvm/Analysis/CallGraph.h
@@ -272,7 +272,7 @@ public:
 private:
   friend class CallGraph;
 
-  AssertingVH<Function> F;
+  Function *F;
 
   std::vector<CallRecord> CalledFunctions;
 
diff --git a/include/llvm/Analysis/ConstantFolding.h b/include/llvm/Analysis/ConstantFolding.h
index 517842c8b0dc..ff6ca1959153 100644
--- a/include/llvm/Analysis/ConstantFolding.h
+++ b/include/llvm/Analysis/ConstantFolding.h
@@ -100,6 +100,12 @@ Constant *ConstantFoldExtractValueInstruction(Constant *Agg,
 /// successful; if not, null is returned.
 Constant *ConstantFoldExtractElementInstruction(Constant *Val, Constant *Idx);
 
+/// \brief Attempt to constant fold a shufflevector instruction with the
+/// specified operands and indices.  The constant result is returned if
+/// successful; if not, null is returned.
+Constant *ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2,
+                                               Constant *Mask);
+
 /// ConstantFoldLoadFromConstPtr - Return the value that a load from C would
 /// produce if it is constant and determinable.  If this is not determinable,
 /// return null.
diff --git a/include/llvm/Analysis/DominanceFrontier.h b/include/llvm/Analysis/DominanceFrontier.h
index b9667f801ed3..8cae63c3c869 100644
--- a/include/llvm/Analysis/DominanceFrontier.h
+++ b/include/llvm/Analysis/DominanceFrontier.h
@@ -141,6 +141,10 @@ public:
   typedef DominanceFrontierBase<BasicBlock>::DomSetType DomSetType;
   typedef DominanceFrontierBase<BasicBlock>::iterator iterator;
   typedef DominanceFrontierBase<BasicBlock>::const_iterator const_iterator;
+
+  /// Handle invalidation explicitly.
+  bool invalidate(Function &F, const PreservedAnalyses &PA,
+                  FunctionAnalysisManager::Invalidator &);
 };
 
 class DominanceFrontierWrapperPass : public FunctionPass {
diff --git a/include/llvm/Analysis/IndirectCallSiteVisitor.h b/include/llvm/Analysis/IndirectCallSiteVisitor.h
index 71a8cb886321..3c40cc0235cc 100644
--- a/include/llvm/Analysis/IndirectCallSiteVisitor.h
+++ b/include/llvm/Analysis/IndirectCallSiteVisitor.h
@@ -21,16 +21,8 @@ struct PGOIndirectCallSiteVisitor
   PGOIndirectCallSiteVisitor() {}
 
   void visitCallSite(CallSite CS) {
-    if (CS.getCalledFunction() || !CS.getCalledValue())
-      return;
-    Instruction *I = CS.getInstruction();
-    if (CallInst *CI = dyn_cast<CallInst>(I)) {
-      if (CI->isInlineAsm())
-        return;
-    }
-    if (isa<Constant>(CS.getCalledValue()))
-      return;
-    IndirectCallInsts.push_back(I);
+    if (CS.isIndirectCall())
+      IndirectCallInsts.push_back(CS.getInstruction());
   }
 };
 
diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h
index 5e7b00261f63..17e5cb6db02d 100644
--- a/include/llvm/Analysis/InlineCost.h
+++ b/include/llvm/Analysis/InlineCost.h
@@ -21,6 +21,7 @@
 
 namespace llvm {
 class AssumptionCacheTracker;
+class BlockFrequencyInfo;
 class CallSite;
 class DataLayout;
 class Function;
@@ -137,6 +138,9 @@ struct InlineParams {
 
   /// Threshold to use when the callsite is considered hot.
   Optional<int> HotCallSiteThreshold;
+
+  /// Threshold to use when the callsite is considered cold.
+  Optional<int> ColdCallSiteThreshold;
 };
 
 /// Generate the parameters to tune the inline cost analysis based only on the
@@ -171,6 +175,7 @@ InlineCost
 getInlineCost(CallSite CS, const InlineParams &Params,
               TargetTransformInfo &CalleeTTI,
               std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+              Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
               ProfileSummaryInfo *PSI);
 
 /// \brief Get an InlineCost with the callee explicitly specified.
@@ -182,6 +187,7 @@ InlineCost
 getInlineCost(CallSite CS, Function *Callee, const InlineParams &Params,
               TargetTransformInfo &CalleeTTI,
               std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+              Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
               ProfileSummaryInfo *PSI);
 
 /// \brief Minimal filter to detect invalid constructs for inlining.
diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h
index 47d6118313cb..b829e995db05 100644
--- a/include/llvm/Analysis/InstructionSimplify.h
+++ b/include/llvm/Analysis/InstructionSimplify.h
@@ -42,6 +42,7 @@ namespace llvm {
   class Instruction;
   class DataLayout;
   class FastMathFlags;
+  class OptimizationRemarkEmitter;
   class TargetLibraryInfo;
   class Type;
   class Value;
@@ -246,6 +247,14 @@ namespace llvm {
                           AssumptionCache *AC = nullptr,
                           const Instruction *CxtI = nullptr);
 
+  /// Given operands for a ShuffleVectorInst, fold the result or return null.
+  Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
+                                   Type *RetTy, const DataLayout &DL,
+                                   const TargetLibraryInfo *TLI = nullptr,
+                                   const DominatorTree *DT = nullptr,
+                                   AssumptionCache *AC = nullptr,
+                                   const Instruction *CxtI = nullptr);
+
   //=== Helper functions for higher up the class hierarchy.
 
 
@@ -296,7 +305,8 @@ namespace llvm {
   Value *SimplifyInstruction(Instruction *I, const DataLayout &DL,
                              const TargetLibraryInfo *TLI = nullptr,
                              const DominatorTree *DT = nullptr,
-                             AssumptionCache *AC = nullptr);
+                             AssumptionCache *AC = nullptr,
+                             OptimizationRemarkEmitter *ORE = nullptr);
 
   /// Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
   ///
diff --git a/include/llvm/Analysis/LazyBlockFrequencyInfo.h b/include/llvm/Analysis/LazyBlockFrequencyInfo.h
index 5a02b9dce463..71ce0842f6a9 100644
--- a/include/llvm/Analysis/LazyBlockFrequencyInfo.h
+++ b/include/llvm/Analysis/LazyBlockFrequencyInfo.h
@@ -9,7 +9,7 @@
 //
 // This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.  The
 // difference is that with this pass the block frequencies are not computed when
-// the analysis pass is executed but rather when the BFI results is explicitly
+// the analysis pass is executed but rather when the BFI result is explicitly
 // requested by the analysis client.
 //
 //===----------------------------------------------------------------------===//
@@ -27,10 +27,58 @@ class BranchProbabilityInfo;
 class Function;
 class LoopInfo;
 
+/// Wraps a BFI to allow lazy computation of the block frequencies.
+///
+/// A pass that only conditionally uses BFI can uncondtionally require the
+/// analysis without paying for the overhead if BFI doesn't end up being used.
+template <typename FunctionT, typename BranchProbabilityInfoPassT,
+          typename LoopInfoT, typename BlockFrequencyInfoT>
+class LazyBlockFrequencyInfo {
+public:
+  LazyBlockFrequencyInfo()
+      : Calculated(false), F(nullptr), BPIPass(nullptr), LI(nullptr) {}
+
+  /// Set up the per-function input.
+  void setAnalysis(const FunctionT *F, BranchProbabilityInfoPassT *BPIPass,
+                   const LoopInfoT *LI) {
+    this->F = F;
+    this->BPIPass = BPIPass;
+    this->LI = LI;
+  }
+
+  /// Retrieve the BFI with the block frequencies computed.
+  BlockFrequencyInfoT &getCalculated() {
+    if (!Calculated) {
+      assert(F && BPIPass && LI && "call setAnalysis");
+      BFI.calculate(
+          *F, BPIPassTrait<BranchProbabilityInfoPassT>::getBPI(BPIPass), *LI);
+      Calculated = true;
+    }
+    return BFI;
+  }
+
+  const BlockFrequencyInfoT &getCalculated() const {
+    return const_cast<LazyBlockFrequencyInfo *>(this)->getCalculated();
+  }
+
+  void releaseMemory() {
+    BFI.releaseMemory();
+    Calculated = false;
+    setAnalysis(nullptr, nullptr, nullptr);
+  }
+
+private:
+  BlockFrequencyInfoT BFI;
+  bool Calculated;
+  const FunctionT *F;
+  BranchProbabilityInfoPassT *BPIPass;
+  const LoopInfoT *LI;
+};
+
 /// \brief This is an alternative analysis pass to
 /// BlockFrequencyInfoWrapperPass.  The difference is that with this pass the
 /// block frequencies are not computed when the analysis pass is executed but
-/// rather when the BFI results is explicitly requested by the analysis client.
+/// rather when the BFI result is explicitly requested by the analysis client.
 ///
 /// There are some additional requirements for any client pass that wants to use
 /// the analysis:
@@ -49,54 +97,12 @@ class LoopInfo;
 ///
 /// Note that it is expected that we wouldn't need this functionality for the
 /// new PM since with the new PM, analyses are executed on demand.
-class LazyBlockFrequencyInfoPass : public FunctionPass {
-
-  /// Wraps a BFI to allow lazy computation of the block frequencies.
-  ///
-  /// A pass that only conditionally uses BFI can uncondtionally require the
-  /// analysis without paying for the overhead if BFI doesn't end up being used.
-  class LazyBlockFrequencyInfo {
-  public:
-    LazyBlockFrequencyInfo()
-        : Calculated(false), F(nullptr), BPIPass(nullptr), LI(nullptr) {}
-
-    /// Set up the per-function input.
-    void setAnalysis(const Function *F, LazyBranchProbabilityInfoPass *BPIPass,
-                     const LoopInfo *LI) {
-      this->F = F;
-      this->BPIPass = BPIPass;
-      this->LI = LI;
-    }
 
-    /// Retrieve the BFI with the block frequencies computed.
-    BlockFrequencyInfo &getCalculated() {
-      if (!Calculated) {
-        assert(F && BPIPass && LI && "call setAnalysis");
-        BFI.calculate(*F, BPIPass->getBPI(), *LI);
-        Calculated = true;
-      }
-      return BFI;
-    }
-
-    const BlockFrequencyInfo &getCalculated() const {
-      return const_cast<LazyBlockFrequencyInfo *>(this)->getCalculated();
-    }
-
-    void releaseMemory() {
-      BFI.releaseMemory();
-      Calculated = false;
-      setAnalysis(nullptr, nullptr, nullptr);
-    }
-
-  private:
-    BlockFrequencyInfo BFI;
-    bool Calculated;
-    const Function *F;
-    LazyBranchProbabilityInfoPass *BPIPass;
-    const LoopInfo *LI;
-  };
-
-  LazyBlockFrequencyInfo LBFI;
+class LazyBlockFrequencyInfoPass : public FunctionPass {
+private:
+  LazyBlockFrequencyInfo<Function, LazyBranchProbabilityInfoPass, LoopInfo,
+                         BlockFrequencyInfo>
+      LBFI;
 
 public:
   static char ID;
diff --git a/include/llvm/Analysis/LazyBranchProbabilityInfo.h b/include/llvm/Analysis/LazyBranchProbabilityInfo.h
index c76fa1e819ae..067d7ebfd1f5 100644
--- a/include/llvm/Analysis/LazyBranchProbabilityInfo.h
+++ b/include/llvm/Analysis/LazyBranchProbabilityInfo.h
@@ -105,5 +105,17 @@ public:
 
 /// \brief Helper for client passes to initialize dependent passes for LBPI.
 void initializeLazyBPIPassPass(PassRegistry &Registry);
+
+/// \brief Simple trait class that provides a mapping between BPI passes and the
+/// corresponding BPInfo.
+template <typename PassT> struct BPIPassTrait {
+  static PassT &getBPI(PassT *P) { return *P; }
+};
+
+template <> struct BPIPassTrait<LazyBranchProbabilityInfoPass> {
+  static BranchProbabilityInfo &getBPI(LazyBranchProbabilityInfoPass *P) {
+    return P->getBPI();
+  }
+};
 }
 #endif
diff --git a/include/llvm/Analysis/LazyCallGraph.h b/include/llvm/Analysis/LazyCallGraph.h
index bca0aebe2eef..ad7f5c80549f 100644
--- a/include/llvm/Analysis/LazyCallGraph.h
+++ b/include/llvm/Analysis/LazyCallGraph.h
@@ -106,6 +106,7 @@ class raw_ostream;
 class LazyCallGraph {
 public:
   class Node;
+  class EdgeSequence;
   class SCC;
   class RefSCC;
   class edge_iterator;
@@ -121,16 +122,6 @@ public:
   /// inherently reference edges, and so the reference graph forms a superset
   /// of the formal call graph.
   ///
-  /// Furthermore, edges also may point to raw \c Function objects when those
-  /// functions have not been scanned and incorporated into the graph (yet).
-  /// This is one of the primary ways in which the graph can be lazy. When
-  /// functions are scanned and fully incorporated into the graph, all of the
-  /// edges referencing them are updated to point to the graph \c Node objects
-  /// instead of to the raw \c Function objects. This class even provides
-  /// methods to trigger this scan on-demand by attempting to get the target
-  /// node of the graph and providing a reference back to the graph in order to
-  /// lazily build it if necessary.
-  ///
   /// All of these forms of edges are fundamentally represented as outgoing
   /// edges. The edges are stored in the source node and point at the target
   /// node. This allows the edge structure itself to be a very compact data
@@ -141,7 +132,6 @@ public:
     enum Kind : bool { Ref = false, Call = true };
 
     Edge();
-    explicit Edge(Function &F, Kind K);
     explicit Edge(Node &N, Kind K);
 
     /// Test whether the edge is null.
@@ -158,197 +148,251 @@ public:
     /// This requires that the edge is not null.
     bool isCall() const;
 
-    /// Get the function referenced by this edge.
-    ///
-    /// This requires that the edge is not null, but will succeed whether we
-    /// have built a graph node for the function yet or not.
-    Function &getFunction() const;
-
-    /// Get the call graph node referenced by this edge if one exists.
+    /// Get the call graph node referenced by this edge.
     ///
-    /// This requires that the edge is not null. If we have built a graph node
-    /// for the function this edge points to, this will return that node,
-    /// otherwise it will return null.
-    Node *getNode() const;
+    /// This requires that the edge is not null.
+    Node &getNode() const;
 
-    /// Get the call graph node for this edge, building it if necessary.
+    /// Get the function referenced by this edge.
     ///
-    /// This requires that the edge is not null. If we have not yet built
-    /// a graph node for the function this edge points to, this will first ask
-    /// the graph to build that node, inserting it into all the relevant
-    /// structures.
-    Node &getNode(LazyCallGraph &G);
+    /// This requires that the edge is not null.
+    Function &getFunction() const;
 
   private:
-    friend class LazyCallGraph::Node;
+    friend class LazyCallGraph::EdgeSequence;
     friend class LazyCallGraph::RefSCC;
 
-    PointerIntPair<PointerUnion<Function *, Node *>, 1, Kind> Value;
+    PointerIntPair<Node *, 1, Kind> Value;
 
     void setKind(Kind K) { Value.setInt(K); }
   };
 
-  typedef SmallVector<Edge, 4> EdgeVectorT;
-  typedef SmallVectorImpl<Edge> EdgeVectorImplT;
-
-  /// A node in the call graph.
+  /// The edge sequence object.
   ///
-  /// This represents a single node. It's primary roles are to cache the list of
-  /// callees, de-duplicate and provide fast testing of whether a function is
-  /// a callee, and facilitate iteration of child nodes in the graph.
-  class Node {
+  /// This typically exists entirely within the node but is exposed as
+  /// a separate type because a node doesn't initially have edges. An explicit
+  /// population step is required to produce this sequence at first and it is
+  /// then cached in the node. It is also used to represent edges entering the
+  /// graph from outside the module to model the graph's roots.
+  ///
+  /// The sequence itself both iterable and indexable. The indexes remain
+  /// stable even as the sequence mutates (including removal).
+  class EdgeSequence {
     friend class LazyCallGraph;
-    friend class LazyCallGraph::SCC;
+    friend class LazyCallGraph::Node;
     friend class LazyCallGraph::RefSCC;
 
-    LazyCallGraph *G;
-    Function &F;
+    typedef SmallVector<Edge, 4> VectorT;
+    typedef SmallVectorImpl<Edge> VectorImplT;
 
-    // We provide for the DFS numbering and Tarjan walk lowlink numbers to be
-    // stored directly within the node. These are both '-1' when nodes are part
-    // of an SCC (or RefSCC), or '0' when not yet reached in a DFS walk.
-    int DFSNumber;
-    int LowLink;
+  public:
+    /// An iterator used for the edges to both entry nodes and child nodes.
+    class iterator
+        : public iterator_adaptor_base<iterator, VectorImplT::iterator,
+                                       std::forward_iterator_tag> {
+      friend class LazyCallGraph;
+      friend class LazyCallGraph::Node;
+
+      VectorImplT::iterator E;
+
+      // Build the iterator for a specific position in the edge list.
+      iterator(VectorImplT::iterator BaseI, VectorImplT::iterator E)
+          : iterator_adaptor_base(BaseI), E(E) {
+        while (I != E && !*I)
+          ++I;
+      }
 
-    mutable EdgeVectorT Edges;
-    DenseMap<Function *, int> EdgeIndexMap;
+    public:
+      iterator() {}
 
-    /// Basic constructor implements the scanning of F into Edges and
-    /// EdgeIndexMap.
-    Node(LazyCallGraph &G, Function &F);
+      using iterator_adaptor_base::operator++;
+      iterator &operator++() {
+        do {
+          ++I;
+        } while (I != E && !*I);
+        return *this;
+      }
+    };
 
-    /// Internal helper to insert an edge to a function.
-    void insertEdgeInternal(Function &ChildF, Edge::Kind EK);
+    /// An iterator over specifically call edges.
+    ///
+    /// This has the same iteration properties as the \c iterator, but
+    /// restricts itself to edges which represent actual calls.
+    class call_iterator
+        : public iterator_adaptor_base<call_iterator, VectorImplT::iterator,
+                                       std::forward_iterator_tag> {
+      friend class LazyCallGraph;
+      friend class LazyCallGraph::Node;
+
+      VectorImplT::iterator E;
+
+      /// Advance the iterator to the next valid, call edge.
+      void advanceToNextEdge() {
+        while (I != E && (!*I || !I->isCall()))
+          ++I;
+      }
 
-    /// Internal helper to insert an edge to a node.
-    void insertEdgeInternal(Node &ChildN, Edge::Kind EK);
+      // Build the iterator for a specific position in the edge list.
+      call_iterator(VectorImplT::iterator BaseI, VectorImplT::iterator E)
+          : iterator_adaptor_base(BaseI), E(E) {
+        advanceToNextEdge();
+      }
 
-    /// Internal helper to change an edge kind.
-    void setEdgeKind(Function &ChildF, Edge::Kind EK);
+    public:
+      call_iterator() {}
 
-    /// Internal helper to remove the edge to the given function.
-    void removeEdgeInternal(Function &ChildF);
+      using iterator_adaptor_base::operator++;
+      call_iterator &operator++() {
+        ++I;
+        advanceToNextEdge();
+        return *this;
+      }
+    };
 
-    void clear() {
-      Edges.clear();
-      EdgeIndexMap.clear();
-    }
+    iterator begin() { return iterator(Edges.begin(), Edges.end()); }
+    iterator end() { return iterator(Edges.end(), Edges.end()); }
 
-    /// Print the name of this node's function.
-    friend raw_ostream &operator<<(raw_ostream &OS, const Node &N) {
-      return OS << N.F.getName();
+    Edge &operator[](int i) { return Edges[i]; }
+    Edge &operator[](Node &N) {
+      assert(EdgeIndexMap.find(&N) != EdgeIndexMap.end() && "No such edge!");
+      return Edges[EdgeIndexMap.find(&N)->second];
     }
-
-    /// Dump the name of this node's function to stderr.
-    void dump() const;
-
-  public:
-    LazyCallGraph &getGraph() const { return *G; }
-
-    Function &getFunction() const { return F; }
-
-    edge_iterator begin() const {
-      return edge_iterator(Edges.begin(), Edges.end());
+    Edge *lookup(Node &N) {
+      auto EI = EdgeIndexMap.find(&N);
+      return EI != EdgeIndexMap.end() ? &Edges[EI->second] : nullptr;
     }
-    edge_iterator end() const { return edge_iterator(Edges.end(), Edges.end()); }
 
-    const Edge &operator[](int i) const { return Edges[i]; }
-    const Edge &operator[](Function &F) const {
-      assert(EdgeIndexMap.find(&F) != EdgeIndexMap.end() && "No such edge!");
-      return Edges[EdgeIndexMap.find(&F)->second];
+    call_iterator call_begin() {
+      return call_iterator(Edges.begin(), Edges.end());
     }
-    const Edge &operator[](Node &N) const { return (*this)[N.getFunction()]; }
+    call_iterator call_end() { return call_iterator(Edges.end(), Edges.end()); }
 
-    const Edge *lookup(Function &F) const {
-      auto EI = EdgeIndexMap.find(&F);
-      return EI != EdgeIndexMap.end() ? &Edges[EI->second] : nullptr;
+    iterator_range<call_iterator> calls() {
+      return make_range(call_begin(), call_end());
     }
 
-    call_edge_iterator call_begin() const {
-      return call_edge_iterator(Edges.begin(), Edges.end());
-    }
-    call_edge_iterator call_end() const {
-      return call_edge_iterator(Edges.end(), Edges.end());
-    }
+    bool empty() {
+      for (auto &E : Edges)
+        if (E)
+          return false;
 
-    iterator_range<call_edge_iterator> calls() const {
-      return make_range(call_begin(), call_end());
+      return true;
     }
 
-    /// Equality is defined as address equality.
-    bool operator==(const Node &N) const { return this == &N; }
-    bool operator!=(const Node &N) const { return !operator==(N); }
-  };
+  private:
+    VectorT Edges;
+    DenseMap<Node *, int> EdgeIndexMap;
 
-  /// A lazy iterator used for both the entry nodes and child nodes.
-  ///
-  /// When this iterator is dereferenced, if not yet available, a function will
-  /// be scanned for "calls" or uses of functions and its child information
-  /// will be constructed. All of these results are accumulated and cached in
-  /// the graph.
-  class edge_iterator
-      : public iterator_adaptor_base<edge_iterator, EdgeVectorImplT::iterator,
-                                     std::forward_iterator_tag> {
-    friend class LazyCallGraph;
-    friend class LazyCallGraph::Node;
+    EdgeSequence() = default;
 
-    EdgeVectorImplT::iterator E;
+    /// Internal helper to insert an edge to a node.
+    void insertEdgeInternal(Node &ChildN, Edge::Kind EK);
 
-    // Build the iterator for a specific position in the edge list.
-    edge_iterator(EdgeVectorImplT::iterator BaseI,
-                  EdgeVectorImplT::iterator E)
-        : iterator_adaptor_base(BaseI), E(E) {
-      while (I != E && !*I)
-        ++I;
-    }
+    /// Internal helper to change an edge kind.
+    void setEdgeKind(Node &ChildN, Edge::Kind EK);
 
-  public:
-    edge_iterator() {}
+    /// Internal helper to remove the edge to the given function.
+    bool removeEdgeInternal(Node &ChildN);
 
-    using iterator_adaptor_base::operator++;
-    edge_iterator &operator++() {
-      do {
-        ++I;
-      } while (I != E && !*I);
-      return *this;
-    }
+    /// Internal helper to replace an edge key with a new one.
+    ///
+    /// This should be used when the function for a particular node in the
+    /// graph gets replaced and we are updating all of the edges to that node
+    /// to use the new function as the key.
+    void replaceEdgeKey(Function &OldTarget, Function &NewTarget);
   };
 
-  /// A lazy iterator over specifically call edges.
+  /// A node in the call graph.
+  ///
+  /// This represents a single node. It's primary roles are to cache the list of
+  /// callees, de-duplicate and provide fast testing of whether a function is
+  /// a callee, and facilitate iteration of child nodes in the graph.
   ///
-  /// This has the same iteration properties as the \c edge_iterator, but
-  /// restricts itself to edges which represent actual calls.
-  class call_edge_iterator
-      : public iterator_adaptor_base<call_edge_iterator,
-                                     EdgeVectorImplT::iterator,
-                                     std::forward_iterator_tag> {
+  /// The node works much like an optional in order to lazily populate the
+  /// edges of each node. Until populated, there are no edges. Once populated,
+  /// you can access the edges by dereferencing the node or using the `->`
+  /// operator as if the node was an `Optional<EdgeSequence>`.
+  class Node {
     friend class LazyCallGraph;
-    friend class LazyCallGraph::Node;
+    friend class LazyCallGraph::RefSCC;
 
-    EdgeVectorImplT::iterator E;
+  public:
+    LazyCallGraph &getGraph() const { return *G; }
 
-    /// Advance the iterator to the next valid, call edge.
-    void advanceToNextEdge() {
-      while (I != E && (!*I || !I->isCall()))
-        ++I;
+    Function &getFunction() const { return *F; }
+
+    StringRef getName() const { return F->getName(); }
+
+    /// Equality is defined as address equality.
+    bool operator==(const Node &N) const { return this == &N; }
+    bool operator!=(const Node &N) const { return !operator==(N); }
+
+    /// Tests whether the node has been populated with edges.
+    operator bool() const { return Edges.hasValue(); }
+
+    // We allow accessing the edges by dereferencing or using the arrow
+    // operator, essentially wrapping the internal optional.
+    EdgeSequence &operator*() const {
+      // Rip const off because the node itself isn't changing here.
+      return const_cast<EdgeSequence &>(*Edges);
     }
+    EdgeSequence *operator->() const { return &**this; }
 
-    // Build the iterator for a specific position in the edge list.
-    call_edge_iterator(EdgeVectorImplT::iterator BaseI,
-                       EdgeVectorImplT::iterator E)
-        : iterator_adaptor_base(BaseI), E(E) {
-      advanceToNextEdge();
+    /// Populate the edges of this node if necessary.
+    ///
+    /// The first time this is called it will populate the edges for this node
+    /// in the graph. It does this by scanning the underlying function, so once
+    /// this is done, any changes to that function must be explicitly reflected
+    /// in updates to the graph.
+    ///
+    /// \returns the populated \c EdgeSequence to simplify walking it.
+    ///
+    /// This will not update or re-scan anything if called repeatedly. Instead,
+    /// the edge sequence is cached and returned immediately on subsequent
+    /// calls.
+    EdgeSequence &populate() {
+      if (Edges)
+        return *Edges;
+
+      return populateSlow();
     }
 
-  public:
-    call_edge_iterator() {}
+  private:
+    LazyCallGraph *G;
+    Function *F;
 
-    using iterator_adaptor_base::operator++;
-    call_edge_iterator &operator++() {
-      ++I;
-      advanceToNextEdge();
-      return *this;
+    // We provide for the DFS numbering and Tarjan walk lowlink numbers to be
+    // stored directly within the node. These are both '-1' when nodes are part
+    // of an SCC (or RefSCC), or '0' when not yet reached in a DFS walk.
+    int DFSNumber;
+    int LowLink;
+
+    Optional<EdgeSequence> Edges;
+
+    /// Basic constructor implements the scanning of F into Edges and
+    /// EdgeIndexMap.
+    Node(LazyCallGraph &G, Function &F)
+        : G(&G), F(&F), DFSNumber(0), LowLink(0) {}
+
+    /// Implementation of the scan when populating.
+    EdgeSequence &populateSlow();
+
+    /// Internal helper to directly replace the function with a new one.
+    ///
+    /// This is used to facilitate tranfsormations which need to replace the
+    /// formal Function object but directly move the body and users from one to
+    /// the other.
+    void replaceFunction(Function &NewF);
+
+    void clear() { Edges.reset(); }
+
+    /// Print the name of this node's function.
+    friend raw_ostream &operator<<(raw_ostream &OS, const Node &N) {
+      return OS << N.F->getName();
     }
+
+    /// Dump the name of this node's function to stderr.
+    void dump() const;
   };
 
   /// An SCC of the call graph.
@@ -789,19 +833,26 @@ public:
     /// already existing edges.
     void insertTrivialRefEdge(Node &SourceN, Node &TargetN);
 
+    /// Directly replace a node's function with a new function.
+    ///
+    /// This should be used when moving the body and users of a function to
+    /// a new formal function object but not otherwise changing the call graph
+    /// structure in any way.
+    ///
+    /// It requires that the old function in the provided node have zero uses
+    /// and the new function must have calls and references to it establishing
+    /// an equivalent graph.
+    void replaceNodeFunction(Node &N, Function &NewF);
+
     ///@}
   };
 
   /// A post-order depth-first RefSCC iterator over the call graph.
   ///
-  /// This iterator triggers the Tarjan DFS-based formation of the RefSCC (and
-  /// SCC) DAG for the call graph, walking it lazily in depth-first post-order.
-  /// That is, it always visits RefSCCs for the target of a reference edge
-  /// prior to visiting the RefSCC for a source of the edge (when they are in
-  /// different RefSCCs).
-  ///
-  /// When forming each RefSCC, the call edges within it are used to form SCCs
-  /// within it, so iterating this also controls the lazy formation of SCCs.
+  /// This iterator walks the cached post-order sequence of RefSCCs. However,
+  /// it trades stability for flexibility. It is restricted to a forward
+  /// iterator but will survive mutations which insert new RefSCCs and continue
+  /// to point to the same RefSCC even if it moves in the post-order sequence.
   class postorder_ref_scc_iterator
       : public iterator_facade_base<postorder_ref_scc_iterator,
                                     std::forward_iterator_tag, RefSCC> {
@@ -825,12 +876,9 @@ public:
     /// populating it if necessary.
     static RefSCC *getRC(LazyCallGraph &G, int Index) {
       if (Index == (int)G.PostOrderRefSCCs.size())
-        if (!G.buildNextRefSCCInPostOrder())
-          // We're at the end.
-          return nullptr;
+        // We're at the end.
+        return nullptr;
 
-      assert(Index < (int)G.PostOrderRefSCCs.size() &&
-             "Built the next post-order RefSCC without growing list!");
       return G.PostOrderRefSCCs[Index];
     }
 
@@ -859,17 +907,21 @@ public:
   LazyCallGraph(LazyCallGraph &&G);
   LazyCallGraph &operator=(LazyCallGraph &&RHS);
 
-  edge_iterator begin() {
-    return edge_iterator(EntryEdges.begin(), EntryEdges.end());
-  }
-  edge_iterator end() {
-    return edge_iterator(EntryEdges.end(), EntryEdges.end());
-  }
+  EdgeSequence::iterator begin() { return EntryEdges.begin(); }
+  EdgeSequence::iterator end() { return EntryEdges.end(); }
+
+  void buildRefSCCs();
 
   postorder_ref_scc_iterator postorder_ref_scc_begin() {
+    if (!EntryEdges.empty())
+      assert(!PostOrderRefSCCs.empty() &&
+             "Must form RefSCCs before iterating them!");
     return postorder_ref_scc_iterator(*this);
   }
   postorder_ref_scc_iterator postorder_ref_scc_end() {
+    if (!EntryEdges.empty())
+      assert(!PostOrderRefSCCs.empty() &&
+             "Must form RefSCCs before iterating them!");
     return postorder_ref_scc_iterator(*this,
                                       postorder_ref_scc_iterator::IsAtEndT());
   }
@@ -920,19 +972,19 @@ public:
   /// below.
 
   /// Update the call graph after inserting a new edge.
-  void insertEdge(Node &Caller, Function &Callee, Edge::Kind EK);
+  void insertEdge(Node &SourceN, Node &TargetN, Edge::Kind EK);
 
   /// Update the call graph after inserting a new edge.
-  void insertEdge(Function &Caller, Function &Callee, Edge::Kind EK) {
-    return insertEdge(get(Caller), Callee, EK);
+  void insertEdge(Function &Source, Function &Target, Edge::Kind EK) {
+    return insertEdge(get(Source), get(Target), EK);
   }
 
   /// Update the call graph after deleting an edge.
-  void removeEdge(Node &Caller, Function &Callee);
+  void removeEdge(Node &SourceN, Node &TargetN);
 
   /// Update the call graph after deleting an edge.
-  void removeEdge(Function &Caller, Function &Callee) {
-    return removeEdge(get(Caller), Callee);
+  void removeEdge(Function &Source, Function &Target) {
+    return removeEdge(get(Source), get(Target));
   }
 
   ///@}
@@ -1013,14 +1065,11 @@ private:
   /// Maps function->node for fast lookup.
   DenseMap<const Function *, Node *> NodeMap;
 
-  /// The entry nodes to the graph.
+  /// The entry edges into the graph.
   ///
-  /// These nodes are reachable through "external" means. Put another way, they
+  /// These edges are from "external" sources. Put another way, they
   /// escape at the module scope.
-  EdgeVectorT EntryEdges;
-
-  /// Map of the entry nodes in the graph to their indices in \c EntryEdges.
-  DenseMap<Function *, int> EntryIndexMap;
+  EdgeSequence EntryEdges;
 
   /// Allocator that holds all the call graph SCCs.
   SpecificBumpPtrAllocator<SCC> SCCBPA;
@@ -1045,18 +1094,6 @@ private:
   /// These are all of the RefSCCs which have no children.
   SmallVector<RefSCC *, 4> LeafRefSCCs;
 
-  /// Stack of nodes in the DFS walk.
-  SmallVector<std::pair<Node *, edge_iterator>, 4> DFSStack;
-
-  /// Set of entry nodes not-yet-processed into RefSCCs.
-  SmallVector<Function *, 4> RefSCCEntryNodes;
-
-  /// Stack of nodes the DFS has walked but not yet put into a RefSCC.
-  SmallVector<Node *, 4> PendingRefSCCStack;
-
-  /// Counter for the next DFS number to assign.
-  int NextDFSNumber;
-
   /// Helper to insert a new function, with an already looked-up entry in
   /// the NodeMap.
   Node &insertInto(Function &F, Node *&MappedN);
@@ -1078,6 +1115,23 @@ private:
     return new (RefSCCBPA.Allocate()) RefSCC(std::forward<Ts>(Args)...);
   }
 
+  /// Common logic for building SCCs from a sequence of roots.
+  ///
+  /// This is a very generic implementation of the depth-first walk and SCC
+  /// formation algorithm. It uses a generic sequence of roots and generic
+  /// callbacks for each step. This is designed to be used to implement both
+  /// the RefSCC formation and SCC formation with shared logic.
+  ///
+  /// Currently this is a relatively naive implementation of Tarjan's DFS
+  /// algorithm to form the SCCs.
+  ///
+  /// FIXME: We should consider newer variants such as Nuutila.
+  template <typename RootsT, typename GetBeginT, typename GetEndT,
+            typename GetNodeT, typename FormSCCCallbackT>
+  static void buildGenericSCCs(RootsT &&Roots, GetBeginT &&GetBegin,
+                               GetEndT &&GetEnd, GetNodeT &&GetNode,
+                               FormSCCCallbackT &&FormSCC);
+
   /// Build the SCCs for a RefSCC out of a list of nodes.
   void buildSCCs(RefSCC &RC, node_stack_range Nodes);
 
@@ -1098,22 +1152,12 @@ private:
            "Index does not point back at RC!");
     return IndexIt->second;
   }
-
-  /// Builds the next node in the post-order RefSCC walk of the call graph and
-  /// appends it to the \c PostOrderRefSCCs vector.
-  ///
-  /// Returns true if a new RefSCC was successfully constructed, and false if
-  /// there are no more RefSCCs to build in the graph.
-  bool buildNextRefSCCInPostOrder();
 };
 
 inline LazyCallGraph::Edge::Edge() : Value() {}
-inline LazyCallGraph::Edge::Edge(Function &F, Kind K) : Value(&F, K) {}
 inline LazyCallGraph::Edge::Edge(Node &N, Kind K) : Value(&N, K) {}
 
-inline LazyCallGraph::Edge::operator bool() const {
-  return !Value.getPointer().isNull();
-}
+inline LazyCallGraph::Edge::operator bool() const { return Value.getPointer(); }
 
 inline LazyCallGraph::Edge::Kind LazyCallGraph::Edge::getKind() const {
   assert(*this && "Queried a null edge!");
@@ -1125,51 +1169,32 @@ inline bool LazyCallGraph::Edge::isCall() const {
   return getKind() == Call;
 }
 
-inline Function &LazyCallGraph::Edge::getFunction() const {
+inline LazyCallGraph::Node &LazyCallGraph::Edge::getNode() const {
   assert(*this && "Queried a null edge!");
-  auto P = Value.getPointer();
-  if (auto *F = P.dyn_cast<Function *>())
-    return *F;
-
-  return P.get<Node *>()->getFunction();
+  return *Value.getPointer();
 }
 
-inline LazyCallGraph::Node *LazyCallGraph::Edge::getNode() const {
-  assert(*this && "Queried a null edge!");
-  auto P = Value.getPointer();
-  if (auto *N = P.dyn_cast<Node *>())
-    return N;
-
-  return nullptr;
-}
-
-inline LazyCallGraph::Node &LazyCallGraph::Edge::getNode(LazyCallGraph &G) {
+inline Function &LazyCallGraph::Edge::getFunction() const {
   assert(*this && "Queried a null edge!");
-  auto P = Value.getPointer();
-  if (auto *N = P.dyn_cast<Node *>())
-    return *N;
-
-  Node &N = G.get(*P.get<Function *>());
-  Value.setPointer(&N);
-  return N;
+  return getNode().getFunction();
 }
 
 // Provide GraphTraits specializations for call graphs.
 template <> struct GraphTraits<LazyCallGraph::Node *> {
   typedef LazyCallGraph::Node *NodeRef;
-  typedef LazyCallGraph::edge_iterator ChildIteratorType;
+  typedef LazyCallGraph::EdgeSequence::iterator ChildIteratorType;
 
   static NodeRef getEntryNode(NodeRef N) { return N; }
-  static ChildIteratorType child_begin(NodeRef N) { return N->begin(); }
-  static ChildIteratorType child_end(NodeRef N) { return N->end(); }
+  static ChildIteratorType child_begin(NodeRef N) { return (*N)->begin(); }
+  static ChildIteratorType child_end(NodeRef N) { return (*N)->end(); }
 };
 template <> struct GraphTraits<LazyCallGraph *> {
   typedef LazyCallGraph::Node *NodeRef;
-  typedef LazyCallGraph::edge_iterator ChildIteratorType;
+  typedef LazyCallGraph::EdgeSequence::iterator ChildIteratorType;
 
   static NodeRef getEntryNode(NodeRef N) { return N; }
-  static ChildIteratorType child_begin(NodeRef N) { return N->begin(); }
-  static ChildIteratorType child_end(NodeRef N) { return N->end(); }
+  static ChildIteratorType child_begin(NodeRef N) { return (*N)->begin(); }
+  static ChildIteratorType child_end(NodeRef N) { return (*N)->end(); }
 };
 
 /// An analysis pass which computes the call graph for a module.
diff --git a/include/llvm/Analysis/LazyValueInfo.h b/include/llvm/Analysis/LazyValueInfo.h
index 610791023a7d..49e088e533dc 100644
--- a/include/llvm/Analysis/LazyValueInfo.h
+++ b/include/llvm/Analysis/LazyValueInfo.h
@@ -32,6 +32,7 @@ namespace llvm {
 class LazyValueInfo {
   friend class LazyValueInfoWrapperPass;
   AssumptionCache *AC = nullptr;
+  const DataLayout *DL = nullptr;
   class TargetLibraryInfo *TLI = nullptr;
   DominatorTree *DT = nullptr;
   void *PImpl = nullptr;
@@ -40,16 +41,17 @@ class LazyValueInfo {
 public:
   ~LazyValueInfo();
   LazyValueInfo() {}
-  LazyValueInfo(AssumptionCache *AC_, TargetLibraryInfo *TLI_,
+  LazyValueInfo(AssumptionCache *AC_, const DataLayout *DL_, TargetLibraryInfo *TLI_,
                 DominatorTree *DT_)
-      : AC(AC_), TLI(TLI_), DT(DT_) {}
+      : AC(AC_), DL(DL_), TLI(TLI_), DT(DT_) {}
   LazyValueInfo(LazyValueInfo &&Arg)
-      : AC(Arg.AC), TLI(Arg.TLI), DT(Arg.DT), PImpl(Arg.PImpl) {
+      : AC(Arg.AC), DL(Arg.DL), TLI(Arg.TLI), DT(Arg.DT), PImpl(Arg.PImpl) {
     Arg.PImpl = nullptr;
   }
   LazyValueInfo &operator=(LazyValueInfo &&Arg) {
     releaseMemory();
     AC = Arg.AC;
+    DL = Arg.DL;
     TLI = Arg.TLI;
     DT = Arg.DT;
     PImpl = Arg.PImpl;
@@ -98,8 +100,15 @@ public:
   /// Inform the analysis cache that we have erased a block.
   void eraseBlock(BasicBlock *BB);
 
+  /// Print the \LazyValueInfoCache.
+  void printCache(Function &F, raw_ostream &OS);
+
   // For old PM pass. Delete once LazyValueInfoWrapperPass is gone.
   void releaseMemory();
+
+  /// Handle invalidation events in the new pass manager.
+  bool invalidate(Function &F, const PreservedAnalyses &PA,
+                  FunctionAnalysisManager::Invalidator &Inv);
 };
 
 /// \brief Analysis to compute lazy value information.
diff --git a/include/llvm/Analysis/Loads.h b/include/llvm/Analysis/Loads.h
index e167f36219d2..a59c1f88e229 100644
--- a/include/llvm/Analysis/Loads.h
+++ b/include/llvm/Analysis/Loads.h
@@ -85,8 +85,37 @@ Value *FindAvailableLoadedValue(LoadInst *Load,
                                 BasicBlock::iterator &ScanFrom,
                                 unsigned MaxInstsToScan = DefMaxInstsToScan,
                                 AliasAnalysis *AA = nullptr,
-                                bool *IsLoadCSE = nullptr);
+                                bool *IsLoadCSE = nullptr,
+                                unsigned *NumScanedInst = nullptr);
 
+/// Scan backwards to see if we have the value of the given pointer available
+/// locally within a small number of instructions.
+///
+/// You can use this function to scan across multiple blocks: after you call
+/// this function, if ScanFrom points at the beginning of the block, it's safe
+/// to continue scanning the predecessors.
+///
+/// \param Ptr The pointer we want the load and store to originate from.
+/// \param AccessTy The access type of the pointer.
+/// \param AtLeastAtomic Are we looking for at-least an atomic load/store ? In
+/// case it is false, we can return an atomic or non-atomic load or store. In
+/// case it is true, we need to return an atomic load or store.
+/// \param ScanBB The basic block to scan.
+/// \param [in,out] ScanFrom The location to start scanning from. When this
+/// function returns, it points at the last instruction scanned.
+/// \param MaxInstsToScan The maximum number of instructions to scan. If this
+/// is zero, the whole block will be scanned.
+/// \param AA Optional pointer to alias analysis, to make the scan more
+/// precise.
+/// \param [out] IsLoad Whether the returned value is a load from the same
+/// location in memory, as opposed to the value operand of a store.
+///
+/// \returns The found value, or nullptr if no value is found.
+Value *FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy, bool AtLeastAtomic,
+                                 BasicBlock *ScanBB,
+                                 BasicBlock::iterator &ScanFrom,
+                                 unsigned MaxInstsToScan, AliasAnalysis *AA,
+                                 bool *IsLoad, unsigned *NumScanedInst);
 }
 
 #endif
diff --git a/include/llvm/Analysis/LoopAccessAnalysis.h b/include/llvm/Analysis/LoopAccessAnalysis.h
index 901b193c7e2d..2568903c57f3 100644
--- a/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -38,39 +38,6 @@ class SCEVUnionPredicate;
 class LoopAccessInfo;
 class OptimizationRemarkEmitter;
 
-/// Optimization analysis message produced during vectorization. Messages inform
-/// the user why vectorization did not occur.
-class LoopAccessReport {
-  std::string Message;
-  const Instruction *Instr;
-
-protected:
-  LoopAccessReport(const Twine &Message, const Instruction *I)
-      : Message(Message.str()), Instr(I) {}
-
-public:
-  LoopAccessReport(const Instruction *I = nullptr) : Instr(I) {}
-
-  template <typename A> LoopAccessReport &operator<<(const A &Value) {
-    raw_string_ostream Out(Message);
-    Out << Value;
-    return *this;
-  }
-
-  const Instruction *getInstr() const { return Instr; }
-
-  std::string &str() { return Message; }
-  const std::string &str() const { return Message; }
-  operator Twine() { return Message; }
-
-  /// \brief Emit an analysis note for \p PassName with the debug location from
-  /// the instruction in \p Message if available.  Otherwise use the location of
-  /// \p TheLoop.
-  static void emitAnalysis(const LoopAccessReport &Message, const Loop *TheLoop,
-                           const char *PassName,
-                           OptimizationRemarkEmitter &ORE);
-};
-
 /// \brief Collection of parameters shared beetween the Loop Vectorizer and the
 /// Loop Access Analysis.
 struct VectorizerParams {
@@ -126,7 +93,7 @@ struct VectorizerParams {
 class MemoryDepChecker {
 public:
   typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
-  typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
+  typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
   /// \brief Set of potential dependent memory accesses.
   typedef EquivalenceClasses<MemAccessInfo> DepCandidates;
 
@@ -221,7 +188,7 @@ public:
   /// \brief Check whether the dependencies between the accesses are safe.
   ///
   /// Only checks sets with elements in \p CheckDeps.
-  bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoSet &CheckDeps,
+  bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
                    const ValueToValueMap &Strides);
 
   /// \brief No memory dependence was encountered that would inhibit
diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h
index 20e6af2727fe..996794b660a9 100644
--- a/include/llvm/Analysis/LoopInfo.h
+++ b/include/llvm/Analysis/LoopInfo.h
@@ -26,7 +26,7 @@
 //  * etc...
 //
 // Note that this analysis specifically identifies *Loops* not cycles or SCCs
-// in the CFG.  There can be strongly connected compontents in the CFG which
+// in the CFG.  There can be strongly connected components in the CFG which
 // this analysis will not recognize and that will not be represented by a Loop
 // instance.  In particular, a Loop might be inside such a non-loop SCC, or a
 // non-loop SCC might contain a sub-SCC which is a Loop. 
@@ -364,7 +364,7 @@ extern template class LoopBase<BasicBlock, Loop>;
 
 
 /// Represents a single loop in the control flow graph.  Note that not all SCCs
-/// in the CFG are neccessarily loops.
+/// in the CFG are necessarily loops.
 class Loop : public LoopBase<BasicBlock, Loop> {
 public:
   /// \brief A range representing the start and end location of a loop.
@@ -469,7 +469,7 @@ public:
   /// the loop that branches to the loop header.
   ///
   /// The LoopID metadata node should have one or more operands and the first
-  /// operand should should be the node itself.
+  /// operand should be the node itself.
   void setLoopID(MDNode *LoopID) const;
 
   /// Return true if no exit block for the loop has a predecessor that is
@@ -478,7 +478,8 @@ public:
 
   /// Return all unique successor blocks of this loop.
   /// These are the blocks _outside of the current loop_ which are branched to.
-  /// This assumes that loop exits are in canonical form.
+  /// This assumes that loop exits are in canonical form, i.e. all exits are
+  /// dedicated exits.
   void getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const;
 
   /// If getUniqueExitBlocks would return exactly one block, return that block.
@@ -570,6 +571,23 @@ public:
   reverse_iterator rend() const { return TopLevelLoops.rend(); }
   bool empty() const { return TopLevelLoops.empty(); }
 
+  /// Return all of the loops in the function in preorder across the loop
+  /// nests, with siblings in forward program order.
+  ///
+  /// Note that because loops form a forest of trees, preorder is equivalent to
+  /// reverse postorder.
+  SmallVector<LoopT *, 4> getLoopsInPreorder();
+
+  /// Return all of the loops in the function in preorder across the loop
+  /// nests, with siblings in *reverse* program order.
+  ///
+  /// Note that because loops form a forest of trees, preorder is equivalent to
+  /// reverse postorder.
+  ///
+  /// Also note that this is *not* a reverse preorder. Only the siblings are in
+  /// reverse program order.
+  SmallVector<LoopT *, 4> getLoopsInReverseSiblingPreorder();
+
   /// Return the inner most loop that BB lives in. If a basic block is in no
   /// loop (for example the entry node), null is returned.
   LoopT *getLoopFor(const BlockT *BB) const { return BBMap.lookup(BB); }
@@ -682,6 +700,10 @@ public:
     return *this;
   }
 
+  /// Handle invalidation explicitly.
+  bool invalidate(Function &F, const PreservedAnalyses &PA,
+                  FunctionAnalysisManager::Invalidator &);
+
   // Most of the public interface is provided via LoopInfoBase.
 
   /// Update LoopInfo after removing the last backedge from a loop. This updates
diff --git a/include/llvm/Analysis/LoopInfoImpl.h b/include/llvm/Analysis/LoopInfoImpl.h
index 833a2202a568..761f8721b54f 100644
--- a/include/llvm/Analysis/LoopInfoImpl.h
+++ b/include/llvm/Analysis/LoopInfoImpl.h
@@ -507,6 +507,55 @@ analyze(const DominatorTreeBase<BlockT> &DomTree) {
   DFS.traverse(DomRoot->getBlock());
 }
 
+template <class BlockT, class LoopT>
+SmallVector<LoopT *, 4> LoopInfoBase<BlockT, LoopT>::getLoopsInPreorder() {
+  SmallVector<LoopT *, 4> PreOrderLoops, PreOrderWorklist;
+  // The outer-most loop actually goes into the result in the same relative
+  // order as we walk it. But LoopInfo stores the top level loops in reverse
+  // program order so for here we reverse it to get forward program order.
+  // FIXME: If we change the order of LoopInfo we will want to remove the
+  // reverse here.
+  for (LoopT *RootL : reverse(*this)) {
+    assert(PreOrderWorklist.empty() &&
+           "Must start with an empty preorder walk worklist.");
+    PreOrderWorklist.push_back(RootL);
+    do {
+      LoopT *L = PreOrderWorklist.pop_back_val();
+      // Sub-loops are stored in forward program order, but will process the
+      // worklist backwards so append them in reverse order.
+      PreOrderWorklist.append(L->rbegin(), L->rend());
+      PreOrderLoops.push_back(L);
+    } while (!PreOrderWorklist.empty());
+  }
+
+  return PreOrderLoops;
+}
+
+template <class BlockT, class LoopT>
+SmallVector<LoopT *, 4>
+LoopInfoBase<BlockT, LoopT>::getLoopsInReverseSiblingPreorder() {
+  SmallVector<LoopT *, 4> PreOrderLoops, PreOrderWorklist;
+  // The outer-most loop actually goes into the result in the same relative
+  // order as we walk it. LoopInfo stores the top level loops in reverse
+  // program order so we walk in order here.
+  // FIXME: If we change the order of LoopInfo we will want to add a reverse
+  // here.
+  for (LoopT *RootL : *this) {
+    assert(PreOrderWorklist.empty() &&
+           "Must start with an empty preorder walk worklist.");
+    PreOrderWorklist.push_back(RootL);
+    do {
+      LoopT *L = PreOrderWorklist.pop_back_val();
+      // Sub-loops are stored in forward program order, but will process the
+      // worklist backwards so we can just append them in order.
+      PreOrderWorklist.append(L->begin(), L->end());
+      PreOrderLoops.push_back(L);
+    } while (!PreOrderWorklist.empty());
+  }
+
+  return PreOrderLoops;
+}
+
 // Debugging
 template<class BlockT, class LoopT>
 void LoopInfoBase<BlockT, LoopT>::print(raw_ostream &OS) const {
@@ -528,15 +577,48 @@ bool compareVectors(std::vector<T> &BB1, std::vector<T> &BB2) {
 }
 
 template <class BlockT, class LoopT>
-static void
-addInnerLoopsToHeadersMap(DenseMap<BlockT *, const LoopT *> &LoopHeaders,
-                          const LoopInfoBase<BlockT, LoopT> &LI,
-                          const LoopT &L) {
+void addInnerLoopsToHeadersMap(DenseMap<BlockT *, const LoopT *> &LoopHeaders,
+                               const LoopInfoBase<BlockT, LoopT> &LI,
+                               const LoopT &L) {
   LoopHeaders[L.getHeader()] = &L;
   for (LoopT *SL : L)
     addInnerLoopsToHeadersMap(LoopHeaders, LI, *SL);
 }
 
+#ifndef NDEBUG
+template <class BlockT, class LoopT>
+static void compareLoops(const LoopT *L, const LoopT *OtherL,
+                         DenseMap<BlockT *, const LoopT *> &OtherLoopHeaders) {
+  BlockT *H = L->getHeader();
+  BlockT *OtherH = OtherL->getHeader();
+  assert(H == OtherH &&
+         "Mismatched headers even though found in the same map entry!");
+
+  assert(L->getLoopDepth() == OtherL->getLoopDepth() &&
+         "Mismatched loop depth!");
+  const LoopT *ParentL = L, *OtherParentL = OtherL;
+  do {
+    assert(ParentL->getHeader() == OtherParentL->getHeader() &&
+           "Mismatched parent loop headers!");
+    ParentL = ParentL->getParentLoop();
+    OtherParentL = OtherParentL->getParentLoop();
+  } while (ParentL);
+
+  for (const LoopT *SubL : *L) {
+    BlockT *SubH = SubL->getHeader();
+    const LoopT *OtherSubL = OtherLoopHeaders.lookup(SubH);
+    assert(OtherSubL && "Inner loop is missing in computed loop info!");
+    OtherLoopHeaders.erase(SubH);
+    compareLoops(SubL, OtherSubL, OtherLoopHeaders);
+  }
+
+  std::vector<BlockT *> BBs = L->getBlocks();
+  std::vector<BlockT *> OtherBBs = OtherL->getBlocks();
+  assert(compareVectors(BBs, OtherBBs) &&
+         "Mismatched basic blocks in the loops!");
+}
+#endif
+
 template <class BlockT, class LoopT>
 void LoopInfoBase<BlockT, LoopT>::verify(
     const DominatorTreeBase<BlockT> &DomTree) const {
@@ -559,42 +641,32 @@ void LoopInfoBase<BlockT, LoopT>::verify(
   LoopInfoBase<BlockT, LoopT> OtherLI;
   OtherLI.analyze(DomTree);
 
-  DenseMap<BlockT *, const LoopT *> LoopHeaders1;
-  DenseMap<BlockT *, const LoopT *> LoopHeaders2;
-
-  for (LoopT *L : *this)
-    addInnerLoopsToHeadersMap(LoopHeaders1, *this, *L);
+  // Build a map we can use to move from our LI to the computed one. This
+  // allows us to ignore the particular order in any layer of the loop forest
+  // while still comparing the structure.
+  DenseMap<BlockT *, const LoopT *> OtherLoopHeaders;
   for (LoopT *L : OtherLI)
-    addInnerLoopsToHeadersMap(LoopHeaders2, OtherLI, *L);
-  assert(LoopHeaders1.size() == LoopHeaders2.size() &&
-         "LoopInfo is incorrect.");
-
-  auto compareLoops = [&](const LoopT *L1, const LoopT *L2) {
-    BlockT *H1 = L1->getHeader();
-    BlockT *H2 = L2->getHeader();
-    if (H1 != H2)
-      return false;
-    std::vector<BlockT *> BB1 = L1->getBlocks();
-    std::vector<BlockT *> BB2 = L2->getBlocks();
-    if (!compareVectors(BB1, BB2))
-      return false;
-
-    std::vector<BlockT *> SubLoopHeaders1;
-    std::vector<BlockT *> SubLoopHeaders2;
-    for (LoopT *L : *L1)
-      SubLoopHeaders1.push_back(L->getHeader());
-    for (LoopT *L : *L2)
-      SubLoopHeaders2.push_back(L->getHeader());
-
-    if (!compareVectors(SubLoopHeaders1, SubLoopHeaders2))
-      return false;
-    return true;
-  };
-
-  for (auto &I : LoopHeaders1) {
-    BlockT *H = I.first;
-    bool LoopsMatch = compareLoops(LoopHeaders1[H], LoopHeaders2[H]);
-    assert(LoopsMatch && "LoopInfo is incorrect.");
+    addInnerLoopsToHeadersMap(OtherLoopHeaders, OtherLI, *L);
+
+  // Walk the top level loops and ensure there is a corresponding top-level
+  // loop in the computed version and then recursively compare those loop
+  // nests.
+  for (LoopT *L : *this) {
+    BlockT *Header = L->getHeader();
+    const LoopT *OtherL = OtherLoopHeaders.lookup(Header);
+    assert(OtherL && "Top level loop is missing in computed loop info!");
+    // Now that we've matched this loop, erase its header from the map.
+    OtherLoopHeaders.erase(Header);
+    // And recursively compare these loops.
+    compareLoops(L, OtherL, OtherLoopHeaders);
+  }
+
+  // Any remaining entries in the map are loops which were found when computing
+  // a fresh LoopInfo but not present in the current one.
+  if (!OtherLoopHeaders.empty()) {
+    for (const auto &HeaderAndLoop : OtherLoopHeaders)
+      dbgs() << "Found new loop: " << *HeaderAndLoop.second << "\n";
+    llvm_unreachable("Found new loops when recomputing LoopInfo!");
   }
 #endif
 }
diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h
index b58f07e69475..c5514316f75f 100644
--- a/include/llvm/Analysis/MemoryBuiltins.h
+++ b/include/llvm/Analysis/MemoryBuiltins.h
@@ -32,12 +32,6 @@ class TargetLibraryInfo;
 class Type;
 class Value;
 
-enum class ObjSizeMode {
-  Exact = 0,
-  Min = 1,
-  Max = 2
-};
-
 /// \brief Tests if a value is a call or invoke to a library function that
 /// allocates or reallocates memory (either malloc, calloc, realloc, or strdup
 /// like).
@@ -129,17 +123,36 @@ static inline CallInst *isFreeCall(Value *I, const TargetLibraryInfo *TLI) {
 //  Utility functions to compute size of objects.
 //
 
+/// Various options to control the behavior of getObjectSize.
+struct ObjectSizeOpts {
+  /// Controls how we handle conditional statements with unknown conditions.
+  enum class Mode : uint8_t {
+    /// Fail to evaluate an unknown condition.
+    Exact,
+    /// Evaluate all branches of an unknown condition. If all evaluations
+    /// succeed, pick the minimum size.
+    Min,
+    /// Same as Min, except we pick the maximum size of all of the branches.
+    Max
+  };
+
+  /// How we want to evaluate this object's size.
+  Mode EvalMode = Mode::Exact;
+  /// Whether to round the result up to the alignment of allocas, byval
+  /// arguments, and global variables.
+  bool RoundToAlign = false;
+  /// If this is true, null pointers in address space 0 will be treated as
+  /// though they can't be evaluated. Otherwise, null is always considered to
+  /// point to a 0 byte region of memory.
+  bool NullIsUnknownSize = false;
+};
+
 /// \brief Compute the size of the object pointed by Ptr. Returns true and the
 /// object size in Size if successful, and false otherwise. In this context, by
 /// object we mean the region of memory starting at Ptr to the end of the
 /// underlying object pointed to by Ptr.
-/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas,
-/// byval arguments, and global variables.
-/// If Mode is Min or Max the size will be evaluated even if it depends on
-/// a condition and corresponding value will be returned (min or max).
 bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL,
-                   const TargetLibraryInfo *TLI, bool RoundToAlign = false,
-                   ObjSizeMode Mode = ObjSizeMode::Exact);
+                   const TargetLibraryInfo *TLI, ObjectSizeOpts Opts = {});
 
 /// Try to turn a call to @llvm.objectsize into an integer value of the given
 /// Type. Returns null on failure.
@@ -160,8 +173,7 @@ class ObjectSizeOffsetVisitor
 
   const DataLayout &DL;
   const TargetLibraryInfo *TLI;
-  bool RoundToAlign;
-  ObjSizeMode Mode;
+  ObjectSizeOpts Options;
   unsigned IntTyBits;
   APInt Zero;
   SmallPtrSet<Instruction *, 8> SeenInsts;
@@ -174,8 +186,7 @@ class ObjectSizeOffsetVisitor
 
 public:
   ObjectSizeOffsetVisitor(const DataLayout &DL, const TargetLibraryInfo *TLI,
-                          LLVMContext &Context, bool RoundToAlign = false,
-                          ObjSizeMode Mode = ObjSizeMode::Exact);
+                          LLVMContext &Context, ObjectSizeOpts Options = {});
 
   SizeOffsetType compute(Value *V);
 
diff --git a/include/llvm/Analysis/MemorySSA.h b/include/llvm/Analysis/MemorySSA.h
new file mode 100644
index 000000000000..db31ae9f4f10
--- /dev/null
+++ b/include/llvm/Analysis/MemorySSA.h
@@ -0,0 +1,1155 @@
+//===- MemorySSA.h - Build Memory SSA ---------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file exposes an interface to building/using memory SSA to
+/// walk memory instructions using a use/def graph.
+///
+/// Memory SSA class builds an SSA form that links together memory access
+/// instructions such as loads, stores, atomics, and calls. Additionally, it
+/// does a trivial form of "heap versioning" Every time the memory state changes
+/// in the program, we generate a new heap version. It generates
+/// MemoryDef/Uses/Phis that are overlayed on top of the existing instructions.
+///
+/// As a trivial example,
+/// define i32 @main() #0 {
+/// entry:
+///   %call = call noalias i8* @_Znwm(i64 4) #2
+///   %0 = bitcast i8* %call to i32*
+///   %call1 = call noalias i8* @_Znwm(i64 4) #2
+///   %1 = bitcast i8* %call1 to i32*
+///   store i32 5, i32* %0, align 4
+///   store i32 7, i32* %1, align 4
+///   %2 = load i32* %0, align 4
+///   %3 = load i32* %1, align 4
+///   %add = add nsw i32 %2, %3
+///   ret i32 %add
+/// }
+///
+/// Will become
+/// define i32 @main() #0 {
+/// entry:
+///   ; 1 = MemoryDef(0)
+///   %call = call noalias i8* @_Znwm(i64 4) #3
+///   %2 = bitcast i8* %call to i32*
+///   ; 2 = MemoryDef(1)
+///   %call1 = call noalias i8* @_Znwm(i64 4) #3
+///   %4 = bitcast i8* %call1 to i32*
+///   ; 3 = MemoryDef(2)
+///   store i32 5, i32* %2, align 4
+///   ; 4 = MemoryDef(3)
+///   store i32 7, i32* %4, align 4
+///   ; MemoryUse(3)
+///   %7 = load i32* %2, align 4
+///   ; MemoryUse(4)
+///   %8 = load i32* %4, align 4
+///   %add = add nsw i32 %7, %8
+///   ret i32 %add
+/// }
+///
+/// Given this form, all the stores that could ever effect the load at %8 can be
+/// gotten by using the MemoryUse associated with it, and walking from use to
+/// def until you hit the top of the function.
+///
+/// Each def also has a list of users associated with it, so you can walk from
+/// both def to users, and users to defs. Note that we disambiguate MemoryUses,
+/// but not the RHS of MemoryDefs. You can see this above at %7, which would
+/// otherwise be a MemoryUse(4). Being disambiguated means that for a given
+/// store, all the MemoryUses on its use lists are may-aliases of that store
+/// (but the MemoryDefs on its use list may not be).
+///
+/// MemoryDefs are not disambiguated because it would require multiple reaching
+/// definitions, which would require multiple phis, and multiple memoryaccesses
+/// per instruction.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_MEMORYSSA_H
+#define LLVM_ANALYSIS_MEMORYSSA_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/OperandTraits.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <iterator>
+#include <memory>
+#include <utility>
+
+namespace llvm {
+
+class Function;
+class Instruction;
+class MemoryAccess;
+class LLVMContext;
+class raw_ostream;
+namespace MSSAHelpers {
+struct AllAccessTag {};
+struct DefsOnlyTag {};
+}
+
+enum {
+  // Used to signify what the default invalid ID is for MemoryAccess's
+  // getID()
+  INVALID_MEMORYACCESS_ID = 0
+};
+
+template <class T> class memoryaccess_def_iterator_base;
+using memoryaccess_def_iterator = memoryaccess_def_iterator_base<MemoryAccess>;
+using const_memoryaccess_def_iterator =
+    memoryaccess_def_iterator_base<const MemoryAccess>;
+
+// \brief The base for all memory accesses. All memory accesses in a block are
+// linked together using an intrusive list.
+class MemoryAccess
+    : public User,
+      public ilist_node<MemoryAccess, ilist_tag<MSSAHelpers::AllAccessTag>>,
+      public ilist_node<MemoryAccess, ilist_tag<MSSAHelpers::DefsOnlyTag>> {
+public:
+  using AllAccessType =
+      ilist_node<MemoryAccess, ilist_tag<MSSAHelpers::AllAccessTag>>;
+  using DefsOnlyType =
+      ilist_node<MemoryAccess, ilist_tag<MSSAHelpers::DefsOnlyTag>>;
+
+  // Methods for support type inquiry through isa, cast, and
+  // dyn_cast
+  static inline bool classof(const Value *V) {
+    unsigned ID = V->getValueID();
+    return ID == MemoryUseVal || ID == MemoryPhiVal || ID == MemoryDefVal;
+  }
+
+  MemoryAccess(const MemoryAccess &) = delete;
+  MemoryAccess &operator=(const MemoryAccess &) = delete;
+  ~MemoryAccess() override;
+
+  void *operator new(size_t, unsigned) = delete;
+  void *operator new(size_t) = delete;
+
+  BasicBlock *getBlock() const { return Block; }
+
+  virtual void print(raw_ostream &OS) const = 0;
+  virtual void dump() const;
+
+  /// \brief The user iterators for a memory access
+  typedef user_iterator iterator;
+  typedef const_user_iterator const_iterator;
+
+  /// \brief This iterator walks over all of the defs in a given
+  /// MemoryAccess. For MemoryPhi nodes, this walks arguments. For
+  /// MemoryUse/MemoryDef, this walks the defining access.
+  memoryaccess_def_iterator defs_begin();
+  const_memoryaccess_def_iterator defs_begin() const;
+  memoryaccess_def_iterator defs_end();
+  const_memoryaccess_def_iterator defs_end() const;
+
+  /// \brief Get the iterators for the all access list and the defs only list
+  /// We default to the all access list.
+  AllAccessType::self_iterator getIterator() {
+    return this->AllAccessType::getIterator();
+  }
+  AllAccessType::const_self_iterator getIterator() const {
+    return this->AllAccessType::getIterator();
+  }
+  AllAccessType::reverse_self_iterator getReverseIterator() {
+    return this->AllAccessType::getReverseIterator();
+  }
+  AllAccessType::const_reverse_self_iterator getReverseIterator() const {
+    return this->AllAccessType::getReverseIterator();
+  }
+  DefsOnlyType::self_iterator getDefsIterator() {
+    return this->DefsOnlyType::getIterator();
+  }
+  DefsOnlyType::const_self_iterator getDefsIterator() const {
+    return this->DefsOnlyType::getIterator();
+  }
+  DefsOnlyType::reverse_self_iterator getReverseDefsIterator() {
+    return this->DefsOnlyType::getReverseIterator();
+  }
+  DefsOnlyType::const_reverse_self_iterator getReverseDefsIterator() const {
+    return this->DefsOnlyType::getReverseIterator();
+  }
+
+protected:
+  friend class MemorySSA;
+  friend class MemoryUseOrDef;
+  friend class MemoryUse;
+  friend class MemoryDef;
+  friend class MemoryPhi;
+
+  /// \brief Used by MemorySSA to change the block of a MemoryAccess when it is
+  /// moved.
+  void setBlock(BasicBlock *BB) { Block = BB; }
+
+  /// \brief Used for debugging and tracking things about MemoryAccesses.
+  /// Guaranteed unique among MemoryAccesses, no guarantees otherwise.
+  virtual unsigned getID() const = 0;
+
+  MemoryAccess(LLVMContext &C, unsigned Vty, BasicBlock *BB,
+               unsigned NumOperands)
+      : User(Type::getVoidTy(C), Vty, nullptr, NumOperands), Block(BB) {}
+
+private:
+  BasicBlock *Block;
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const MemoryAccess &MA) {
+  MA.print(OS);
+  return OS;
+}
+
+/// \brief Class that has the common methods + fields of memory uses/defs. It's
+/// a little awkward to have, but there are many cases where we want either a
+/// use or def, and there are many cases where uses are needed (defs aren't
+/// acceptable), and vice-versa.
+///
+/// This class should never be instantiated directly; make a MemoryUse or
+/// MemoryDef instead.
+class MemoryUseOrDef : public MemoryAccess {
+public:
+  void *operator new(size_t, unsigned) = delete;
+  void *operator new(size_t) = delete;
+
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess);
+
+  /// \brief Get the instruction that this MemoryUse represents.
+  Instruction *getMemoryInst() const { return MemoryInst; }
+
+  /// \brief Get the access that produces the memory state used by this Use.
+  MemoryAccess *getDefiningAccess() const { return getOperand(0); }
+
+  static inline bool classof(const Value *MA) {
+    return MA->getValueID() == MemoryUseVal || MA->getValueID() == MemoryDefVal;
+  }
+
+  // Sadly, these have to be public because they are needed in some of the
+  // iterators.
+  virtual bool isOptimized() const = 0;
+  virtual MemoryAccess *getOptimized() const = 0;
+  virtual void setOptimized(MemoryAccess *) = 0;
+
+  /// \brief Reset the ID of what this MemoryUse was optimized to, causing it to
+  /// be rewalked by the walker if necessary.
+  /// This really should only be called by tests.
+  virtual void resetOptimized() = 0;
+
+protected:
+  friend class MemorySSA;
+  friend class MemorySSAUpdater;
+  MemoryUseOrDef(LLVMContext &C, MemoryAccess *DMA, unsigned Vty,
+                 Instruction *MI, BasicBlock *BB)
+      : MemoryAccess(C, Vty, BB, 1), MemoryInst(MI) {
+    setDefiningAccess(DMA);
+  }
+  void setDefiningAccess(MemoryAccess *DMA, bool Optimized = false) {
+    if (!Optimized) {
+      setOperand(0, DMA);
+      return;
+    }
+    setOptimized(DMA);
+  }
+
+private:
+  Instruction *MemoryInst;
+};
+
+template <>
+struct OperandTraits<MemoryUseOrDef>
+    : public FixedNumOperandTraits<MemoryUseOrDef, 1> {};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryUseOrDef, MemoryAccess)
+
+/// \brief Represents read-only accesses to memory
+///
+/// In particular, the set of Instructions that will be represented by
+/// MemoryUse's is exactly the set of Instructions for which
+/// AliasAnalysis::getModRefInfo returns "Ref".
+class MemoryUse final : public MemoryUseOrDef {
+public:
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess);
+
+  MemoryUse(LLVMContext &C, MemoryAccess *DMA, Instruction *MI, BasicBlock *BB)
+      : MemoryUseOrDef(C, DMA, MemoryUseVal, MI, BB), OptimizedID(0) {}
+
+  // allocate space for exactly one operand
+  void *operator new(size_t s) { return User::operator new(s, 1); }
+  void *operator new(size_t, unsigned) = delete;
+
+  static inline bool classof(const Value *MA) {
+    return MA->getValueID() == MemoryUseVal;
+  }
+
+  void print(raw_ostream &OS) const override;
+
+  virtual void setOptimized(MemoryAccess *DMA) override {
+    OptimizedID = DMA->getID();
+    setOperand(0, DMA);
+  }
+
+  virtual bool isOptimized() const override {
+    return getDefiningAccess() && OptimizedID == getDefiningAccess()->getID();
+  }
+
+  virtual MemoryAccess *getOptimized() const override {
+    return getDefiningAccess();
+  }
+  virtual void resetOptimized() override {
+    OptimizedID = INVALID_MEMORYACCESS_ID;
+  }
+
+protected:
+  friend class MemorySSA;
+
+  unsigned getID() const override {
+    llvm_unreachable("MemoryUses do not have IDs");
+  }
+
+private:
+  unsigned int OptimizedID;
+};
+
+template <>
+struct OperandTraits<MemoryUse> : public FixedNumOperandTraits<MemoryUse, 1> {};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryUse, MemoryAccess)
+
+/// \brief Represents a read-write access to memory, whether it is a must-alias,
+/// or a may-alias.
+///
+/// In particular, the set of Instructions that will be represented by
+/// MemoryDef's is exactly the set of Instructions for which
+/// AliasAnalysis::getModRefInfo returns "Mod" or "ModRef".
+/// Note that, in order to provide def-def chains, all defs also have a use
+/// associated with them. This use points to the nearest reaching
+/// MemoryDef/MemoryPhi.
+class MemoryDef final : public MemoryUseOrDef {
+public:
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess);
+
+  MemoryDef(LLVMContext &C, MemoryAccess *DMA, Instruction *MI, BasicBlock *BB,
+            unsigned Ver)
+      : MemoryUseOrDef(C, DMA, MemoryDefVal, MI, BB), ID(Ver),
+        Optimized(nullptr), OptimizedID(INVALID_MEMORYACCESS_ID) {}
+
+  // allocate space for exactly one operand
+  void *operator new(size_t s) { return User::operator new(s, 1); }
+  void *operator new(size_t, unsigned) = delete;
+
+  static inline bool classof(const Value *MA) {
+    return MA->getValueID() == MemoryDefVal;
+  }
+
+  virtual void setOptimized(MemoryAccess *MA) override {
+    Optimized = MA;
+    OptimizedID = getDefiningAccess()->getID();
+  }
+  virtual MemoryAccess *getOptimized() const override { return Optimized; }
+  virtual bool isOptimized() const override {
+    return getOptimized() && getDefiningAccess() &&
+           OptimizedID == getDefiningAccess()->getID();
+  }
+  virtual void resetOptimized() override {
+    OptimizedID = INVALID_MEMORYACCESS_ID;
+  }
+
+  void print(raw_ostream &OS) const override;
+
+protected:
+  friend class MemorySSA;
+
+  unsigned getID() const override { return ID; }
+
+private:
+  const unsigned ID;
+  MemoryAccess *Optimized;
+  unsigned int OptimizedID;
+};
+
+template <>
+struct OperandTraits<MemoryDef> : public FixedNumOperandTraits<MemoryDef, 1> {};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryDef, MemoryAccess)
+
+/// \brief Represents phi nodes for memory accesses.
+///
+/// These have the same semantic as regular phi nodes, with the exception that
+/// only one phi will ever exist in a given basic block.
+/// Guaranteeing one phi per block means guaranteeing there is only ever one
+/// valid reaching MemoryDef/MemoryPHI along each path to the phi node.
+/// This is ensured by not allowing disambiguation of the RHS of a MemoryDef or
+/// a MemoryPhi's operands.
+/// That is, given
+/// if (a) {
+///   store %a
+///   store %b
+/// }
+/// it *must* be transformed into
+/// if (a) {
+///    1 = MemoryDef(liveOnEntry)
+///    store %a
+///    2 = MemoryDef(1)
+///    store %b
+/// }
+/// and *not*
+/// if (a) {
+///    1 = MemoryDef(liveOnEntry)
+///    store %a
+///    2 = MemoryDef(liveOnEntry)
+///    store %b
+/// }
+/// even if the two stores do not conflict. Otherwise, both 1 and 2 reach the
+/// end of the branch, and if there are not two phi nodes, one will be
+/// disconnected completely from the SSA graph below that point.
+/// Because MemoryUse's do not generate new definitions, they do not have this
+/// issue.
+class MemoryPhi final : public MemoryAccess {
+  // allocate space for exactly zero operands
+  void *operator new(size_t s) { return User::operator new(s); }
+
+public:
+  /// Provide fast operand accessors
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess);
+
+  MemoryPhi(LLVMContext &C, BasicBlock *BB, unsigned Ver, unsigned NumPreds = 0)
+      : MemoryAccess(C, MemoryPhiVal, BB, 0), ID(Ver), ReservedSpace(NumPreds) {
+    allocHungoffUses(ReservedSpace);
+  }
+
+  void *operator new(size_t, unsigned) = delete;
+
+  // Block iterator interface. This provides access to the list of incoming
+  // basic blocks, which parallels the list of incoming values.
+  typedef BasicBlock **block_iterator;
+  typedef BasicBlock *const *const_block_iterator;
+
+  block_iterator block_begin() {
+    auto *Ref = reinterpret_cast<Use::UserRef *>(op_begin() + ReservedSpace);
+    return reinterpret_cast<block_iterator>(Ref + 1);
+  }
+
+  const_block_iterator block_begin() const {
+    const auto *Ref =
+        reinterpret_cast<const Use::UserRef *>(op_begin() + ReservedSpace);
+    return reinterpret_cast<const_block_iterator>(Ref + 1);
+  }
+
+  block_iterator block_end() { return block_begin() + getNumOperands(); }
+
+  const_block_iterator block_end() const {
+    return block_begin() + getNumOperands();
+  }
+
+  iterator_range<block_iterator> blocks() {
+    return make_range(block_begin(), block_end());
+  }
+
+  iterator_range<const_block_iterator> blocks() const {
+    return make_range(block_begin(), block_end());
+  }
+
+  op_range incoming_values() { return operands(); }
+
+  const_op_range incoming_values() const { return operands(); }
+
+  /// \brief Return the number of incoming edges
+  unsigned getNumIncomingValues() const { return getNumOperands(); }
+
+  /// \brief Return incoming value number x
+  MemoryAccess *getIncomingValue(unsigned I) const { return getOperand(I); }
+  void setIncomingValue(unsigned I, MemoryAccess *V) {
+    assert(V && "PHI node got a null value!");
+    setOperand(I, V);
+  }
+  static unsigned getOperandNumForIncomingValue(unsigned I) { return I; }
+  static unsigned getIncomingValueNumForOperand(unsigned I) { return I; }
+
+  /// \brief Return incoming basic block number @p i.
+  BasicBlock *getIncomingBlock(unsigned I) const { return block_begin()[I]; }
+
+  /// \brief Return incoming basic block corresponding
+  /// to an operand of the PHI.
+  BasicBlock *getIncomingBlock(const Use &U) const {
+    assert(this == U.getUser() && "Iterator doesn't point to PHI's Uses?");
+    return getIncomingBlock(unsigned(&U - op_begin()));
+  }
+
+  /// \brief Return incoming basic block corresponding
+  /// to value use iterator.
+  BasicBlock *getIncomingBlock(MemoryAccess::const_user_iterator I) const {
+    return getIncomingBlock(I.getUse());
+  }
+
+  void setIncomingBlock(unsigned I, BasicBlock *BB) {
+    assert(BB && "PHI node got a null basic block!");
+    block_begin()[I] = BB;
+  }
+
+  /// \brief Add an incoming value to the end of the PHI list
+  void addIncoming(MemoryAccess *V, BasicBlock *BB) {
+    if (getNumOperands() == ReservedSpace)
+      growOperands(); // Get more space!
+    // Initialize some new operands.
+    setNumHungOffUseOperands(getNumOperands() + 1);
+    setIncomingValue(getNumOperands() - 1, V);
+    setIncomingBlock(getNumOperands() - 1, BB);
+  }
+
+  /// \brief Return the first index of the specified basic
+  /// block in the value list for this PHI.  Returns -1 if no instance.
+  int getBasicBlockIndex(const BasicBlock *BB) const {
+    for (unsigned I = 0, E = getNumOperands(); I != E; ++I)
+      if (block_begin()[I] == BB)
+        return I;
+    return -1;
+  }
+
+  Value *getIncomingValueForBlock(const BasicBlock *BB) const {
+    int Idx = getBasicBlockIndex(BB);
+    assert(Idx >= 0 && "Invalid basic block argument!");
+    return getIncomingValue(Idx);
+  }
+
+  static inline bool classof(const Value *V) {
+    return V->getValueID() == MemoryPhiVal;
+  }
+
+  void print(raw_ostream &OS) const override;
+
+protected:
+  friend class MemorySSA;
+
+  /// \brief this is more complicated than the generic
+  /// User::allocHungoffUses, because we have to allocate Uses for the incoming
+  /// values and pointers to the incoming blocks, all in one allocation.
+  void allocHungoffUses(unsigned N) {
+    User::allocHungoffUses(N, /* IsPhi */ true);
+  }
+
+  unsigned getID() const final { return ID; }
+
+private:
+  // For debugging only
+  const unsigned ID;
+  unsigned ReservedSpace;
+
+  /// \brief This grows the operand list in response to a push_back style of
+  /// operation.  This grows the number of ops by 1.5 times.
+  void growOperands() {
+    unsigned E = getNumOperands();
+    // 2 op PHI nodes are VERY common, so reserve at least enough for that.
+    ReservedSpace = std::max(E + E / 2, 2u);
+    growHungoffUses(ReservedSpace, /* IsPhi */ true);
+  }
+};
+
+template <> struct OperandTraits<MemoryPhi> : public HungoffOperandTraits<2> {};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryPhi, MemoryAccess)
+
+class MemorySSAWalker;
+
+/// \brief Encapsulates MemorySSA, including all data associated with memory
+/// accesses.
+class MemorySSA {
+public:
+  MemorySSA(Function &, AliasAnalysis *, DominatorTree *);
+  ~MemorySSA();
+
+  MemorySSAWalker *getWalker();
+
+  /// \brief Given a memory Mod/Ref'ing instruction, get the MemorySSA
+  /// access associated with it. If passed a basic block gets the memory phi
+  /// node that exists for that block, if there is one. Otherwise, this will get
+  /// a MemoryUseOrDef.
+  MemoryUseOrDef *getMemoryAccess(const Instruction *) const;
+  MemoryPhi *getMemoryAccess(const BasicBlock *BB) const;
+
+  void dump() const;
+  void print(raw_ostream &) const;
+
+  /// \brief Return true if \p MA represents the live on entry value
+  ///
+  /// Loads and stores from pointer arguments and other global values may be
+  /// defined by memory operations that do not occur in the current function, so
+  /// they may be live on entry to the function. MemorySSA represents such
+  /// memory state by the live on entry definition, which is guaranteed to occur
+  /// before any other memory access in the function.
+  inline bool isLiveOnEntryDef(const MemoryAccess *MA) const {
+    return MA == LiveOnEntryDef.get();
+  }
+
+  inline MemoryAccess *getLiveOnEntryDef() const {
+    return LiveOnEntryDef.get();
+  }
+
+  // Sadly, iplists, by default, owns and deletes pointers added to the
+  // list. It's not currently possible to have two iplists for the same type,
+  // where one owns the pointers, and one does not. This is because the traits
+  // are per-type, not per-tag.  If this ever changes, we should make the
+  // DefList an iplist.
+  using AccessList = iplist<MemoryAccess, ilist_tag<MSSAHelpers::AllAccessTag>>;
+  using DefsList =
+      simple_ilist<MemoryAccess, ilist_tag<MSSAHelpers::DefsOnlyTag>>;
+
+  /// \brief Return the list of MemoryAccess's for a given basic block.
+  ///
+  /// This list is not modifiable by the user.
+  const AccessList *getBlockAccesses(const BasicBlock *BB) const {
+    return getWritableBlockAccesses(BB);
+  }
+
+  /// \brief Return the list of MemoryDef's and MemoryPhi's for a given basic
+  /// block.
+  ///
+  /// This list is not modifiable by the user.
+  const DefsList *getBlockDefs(const BasicBlock *BB) const {
+    return getWritableBlockDefs(BB);
+  }
+
+  /// \brief Given two memory accesses in the same basic block, determine
+  /// whether MemoryAccess \p A dominates MemoryAccess \p B.
+  bool locallyDominates(const MemoryAccess *A, const MemoryAccess *B) const;
+
+  /// \brief Given two memory accesses in potentially different blocks,
+  /// determine whether MemoryAccess \p A dominates MemoryAccess \p B.
+  bool dominates(const MemoryAccess *A, const MemoryAccess *B) const;
+
+  /// \brief Given a MemoryAccess and a Use, determine whether MemoryAccess \p A
+  /// dominates Use \p B.
+  bool dominates(const MemoryAccess *A, const Use &B) const;
+
+  /// \brief Verify that MemorySSA is self consistent (IE definitions dominate
+  /// all uses, uses appear in the right places).  This is used by unit tests.
+  void verifyMemorySSA() const;
+
+  /// Used in various insertion functions to specify whether we are talking
+  /// about the beginning or end of a block.
+  enum InsertionPlace { Beginning, End };
+
+protected:
+  // Used by Memory SSA annotater, dumpers, and wrapper pass
+  friend class MemorySSAAnnotatedWriter;
+  friend class MemorySSAPrinterLegacyPass;
+  friend class MemorySSAUpdater;
+
+  void verifyDefUses(Function &F) const;
+  void verifyDomination(Function &F) const;
+  void verifyOrdering(Function &F) const;
+
+  // This is used by the use optimizer and updater.
+  AccessList *getWritableBlockAccesses(const BasicBlock *BB) const {
+    auto It = PerBlockAccesses.find(BB);
+    return It == PerBlockAccesses.end() ? nullptr : It->second.get();
+  }
+
+  // This is used by the use optimizer and updater.
+  DefsList *getWritableBlockDefs(const BasicBlock *BB) const {
+    auto It = PerBlockDefs.find(BB);
+    return It == PerBlockDefs.end() ? nullptr : It->second.get();
+  }
+
+  // These is used by the updater to perform various internal MemorySSA
+  // machinsations.  They do not always leave the IR in a correct state, and
+  // relies on the updater to fixup what it breaks, so it is not public.
+
+  void moveTo(MemoryUseOrDef *What, BasicBlock *BB, AccessList::iterator Where);
+  void moveTo(MemoryUseOrDef *What, BasicBlock *BB, InsertionPlace Point);
+  // Rename the dominator tree branch rooted at BB.
+  void renamePass(BasicBlock *BB, MemoryAccess *IncomingVal,
+                  SmallPtrSetImpl<BasicBlock *> &Visited) {
+    renamePass(DT->getNode(BB), IncomingVal, Visited, true, true);
+  }
+  void removeFromLookups(MemoryAccess *);
+  void removeFromLists(MemoryAccess *, bool ShouldDelete = true);
+  void insertIntoListsForBlock(MemoryAccess *, const BasicBlock *,
+                               InsertionPlace);
+  void insertIntoListsBefore(MemoryAccess *, const BasicBlock *,
+                             AccessList::iterator);
+  MemoryUseOrDef *createDefinedAccess(Instruction *, MemoryAccess *);
+
+private:
+  class CachingWalker;
+  class OptimizeUses;
+
+  CachingWalker *getWalkerImpl();
+  void buildMemorySSA();
+  void optimizeUses();
+
+  void verifyUseInDefs(MemoryAccess *, MemoryAccess *) const;
+  using AccessMap = DenseMap<const BasicBlock *, std::unique_ptr<AccessList>>;
+  using DefsMap = DenseMap<const BasicBlock *, std::unique_ptr<DefsList>>;
+
+  void
+  determineInsertionPoint(const SmallPtrSetImpl<BasicBlock *> &DefiningBlocks);
+  void markUnreachableAsLiveOnEntry(BasicBlock *BB);
+  bool dominatesUse(const MemoryAccess *, const MemoryAccess *) const;
+  MemoryPhi *createMemoryPhi(BasicBlock *BB);
+  MemoryUseOrDef *createNewAccess(Instruction *);
+  MemoryAccess *findDominatingDef(BasicBlock *, enum InsertionPlace);
+  void placePHINodes(const SmallPtrSetImpl<BasicBlock *> &,
+                     const DenseMap<const BasicBlock *, unsigned int> &);
+  MemoryAccess *renameBlock(BasicBlock *, MemoryAccess *, bool);
+  void renameSuccessorPhis(BasicBlock *, MemoryAccess *, bool);
+  void renamePass(DomTreeNode *, MemoryAccess *IncomingVal,
+                  SmallPtrSetImpl<BasicBlock *> &Visited,
+                  bool SkipVisited = false, bool RenameAllUses = false);
+  AccessList *getOrCreateAccessList(const BasicBlock *);
+  DefsList *getOrCreateDefsList(const BasicBlock *);
+  void renumberBlock(const BasicBlock *) const;
+  AliasAnalysis *AA;
+  DominatorTree *DT;
+  Function &F;
+
+  // Memory SSA mappings
+  DenseMap<const Value *, MemoryAccess *> ValueToMemoryAccess;
+  // These two mappings contain the main block to access/def mappings for
+  // MemorySSA. The list contained in PerBlockAccesses really owns all the
+  // MemoryAccesses.
+  // Both maps maintain the invariant that if a block is found in them, the
+  // corresponding list is not empty, and if a block is not found in them, the
+  // corresponding list is empty.
+  AccessMap PerBlockAccesses;
+  DefsMap PerBlockDefs;
+  std::unique_ptr<MemoryAccess> LiveOnEntryDef;
+
+  // Domination mappings
+  // Note that the numbering is local to a block, even though the map is
+  // global.
+  mutable SmallPtrSet<const BasicBlock *, 16> BlockNumberingValid;
+  mutable DenseMap<const MemoryAccess *, unsigned long> BlockNumbering;
+
+  // Memory SSA building info
+  std::unique_ptr<CachingWalker> Walker;
+  unsigned NextID;
+};
+
+// Internal MemorySSA utils, for use by MemorySSA classes and walkers
+class MemorySSAUtil {
+protected:
+  friend class MemorySSAWalker;
+  friend class GVNHoist;
+  // This function should not be used by new passes.
+  static bool defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU,
+                                  AliasAnalysis &AA);
+};
+
+// This pass does eager building and then printing of MemorySSA. It is used by
+// the tests to be able to build, dump, and verify Memory SSA.
+class MemorySSAPrinterLegacyPass : public FunctionPass {
+public:
+  MemorySSAPrinterLegacyPass();
+
+  bool runOnFunction(Function &) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  static char ID;
+};
+
+/// An analysis that produces \c MemorySSA for a function.
+///
+class MemorySSAAnalysis : public AnalysisInfoMixin<MemorySSAAnalysis> {
+  friend AnalysisInfoMixin<MemorySSAAnalysis>;
+
+  static AnalysisKey Key;
+
+public:
+  // Wrap MemorySSA result to ensure address stability of internal MemorySSA
+  // pointers after construction.  Use a wrapper class instead of plain
+  // unique_ptr<MemorySSA> to avoid build breakage on MSVC.
+  struct Result {
+    Result(std::unique_ptr<MemorySSA> &&MSSA) : MSSA(std::move(MSSA)) {}
+    MemorySSA &getMSSA() { return *MSSA.get(); }
+
+    std::unique_ptr<MemorySSA> MSSA;
+  };
+
+  Result run(Function &F, FunctionAnalysisManager &AM);
+};
+
+/// \brief Printer pass for \c MemorySSA.
+class MemorySSAPrinterPass : public PassInfoMixin<MemorySSAPrinterPass> {
+  raw_ostream &OS;
+
+public:
+  explicit MemorySSAPrinterPass(raw_ostream &OS) : OS(OS) {}
+
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+/// \brief Verifier pass for \c MemorySSA.
+struct MemorySSAVerifierPass : PassInfoMixin<MemorySSAVerifierPass> {
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+/// \brief Legacy analysis pass which computes \c MemorySSA.
+class MemorySSAWrapperPass : public FunctionPass {
+public:
+  MemorySSAWrapperPass();
+
+  static char ID;
+
+  bool runOnFunction(Function &) override;
+  void releaseMemory() override;
+  MemorySSA &getMSSA() { return *MSSA; }
+  const MemorySSA &getMSSA() const { return *MSSA; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  void verifyAnalysis() const override;
+  void print(raw_ostream &OS, const Module *M = nullptr) const override;
+
+private:
+  std::unique_ptr<MemorySSA> MSSA;
+};
+
+/// \brief This is the generic walker interface for walkers of MemorySSA.
+/// Walkers are used to be able to further disambiguate the def-use chains
+/// MemorySSA gives you, or otherwise produce better info than MemorySSA gives
+/// you.
+/// In particular, while the def-use chains provide basic information, and are
+/// guaranteed to give, for example, the nearest may-aliasing MemoryDef for a
+/// MemoryUse as AliasAnalysis considers it, a user mant want better or other
+/// information. In particular, they may want to use SCEV info to further
+/// disambiguate memory accesses, or they may want the nearest dominating
+/// may-aliasing MemoryDef for a call or a store. This API enables a
+/// standardized interface to getting and using that info.
+class MemorySSAWalker {
+public:
+  MemorySSAWalker(MemorySSA *);
+  virtual ~MemorySSAWalker() = default;
+
+  using MemoryAccessSet = SmallVector<MemoryAccess *, 8>;
+
+  /// \brief Given a memory Mod/Ref/ModRef'ing instruction, calling this
+  /// will give you the nearest dominating MemoryAccess that Mod's the location
+  /// the instruction accesses (by skipping any def which AA can prove does not
+  /// alias the location(s) accessed by the instruction given).
+  ///
+  /// Note that this will return a single access, and it must dominate the
+  /// Instruction, so if an operand of a MemoryPhi node Mod's the instruction,
+  /// this will return the MemoryPhi, not the operand. This means that
+  /// given:
+  /// if (a) {
+  ///   1 = MemoryDef(liveOnEntry)
+  ///   store %a
+  /// } else {
+  ///   2 = MemoryDef(liveOnEntry)
+  ///   store %b
+  /// }
+  /// 3 = MemoryPhi(2, 1)
+  /// MemoryUse(3)
+  /// load %a
+  ///
+  /// calling this API on load(%a) will return the MemoryPhi, not the MemoryDef
+  /// in the if (a) branch.
+  MemoryAccess *getClobberingMemoryAccess(const Instruction *I) {
+    MemoryAccess *MA = MSSA->getMemoryAccess(I);
+    assert(MA && "Handed an instruction that MemorySSA doesn't recognize?");
+    return getClobberingMemoryAccess(MA);
+  }
+
+  /// Does the same thing as getClobberingMemoryAccess(const Instruction *I),
+  /// but takes a MemoryAccess instead of an Instruction.
+  virtual MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) = 0;
+
+  /// \brief Given a potentially clobbering memory access and a new location,
+  /// calling this will give you the nearest dominating clobbering MemoryAccess
+  /// (by skipping non-aliasing def links).
+  ///
+  /// This version of the function is mainly used to disambiguate phi translated
+  /// pointers, where the value of a pointer may have changed from the initial
+  /// memory access. Note that this expects to be handed either a MemoryUse,
+  /// or an already potentially clobbering access. Unlike the above API, if
+  /// given a MemoryDef that clobbers the pointer as the starting access, it
+  /// will return that MemoryDef, whereas the above would return the clobber
+  /// starting from the use side of  the memory def.
+  virtual MemoryAccess *getClobberingMemoryAccess(MemoryAccess *,
+                                                  const MemoryLocation &) = 0;
+
+  /// \brief Given a memory access, invalidate anything this walker knows about
+  /// that access.
+  /// This API is used by walkers that store information to perform basic cache
+  /// invalidation.  This will be called by MemorySSA at appropriate times for
+  /// the walker it uses or returns.
+  virtual void invalidateInfo(MemoryAccess *) {}
+
+  virtual void verify(const MemorySSA *MSSA) { assert(MSSA == this->MSSA); }
+
+protected:
+  friend class MemorySSA; // For updating MSSA pointer in MemorySSA move
+                          // constructor.
+  MemorySSA *MSSA;
+};
+
+/// \brief A MemorySSAWalker that does no alias queries, or anything else. It
+/// simply returns the links as they were constructed by the builder.
+class DoNothingMemorySSAWalker final : public MemorySSAWalker {
+public:
+  // Keep the overrides below from hiding the Instruction overload of
+  // getClobberingMemoryAccess.
+  using MemorySSAWalker::getClobberingMemoryAccess;
+
+  MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) override;
+  MemoryAccess *getClobberingMemoryAccess(MemoryAccess *,
+                                          const MemoryLocation &) override;
+};
+
+using MemoryAccessPair = std::pair<MemoryAccess *, MemoryLocation>;
+using ConstMemoryAccessPair = std::pair<const MemoryAccess *, MemoryLocation>;
+
+/// \brief Iterator base class used to implement const and non-const iterators
+/// over the defining accesses of a MemoryAccess.
+template <class T>
+class memoryaccess_def_iterator_base
+    : public iterator_facade_base<memoryaccess_def_iterator_base<T>,
+                                  std::forward_iterator_tag, T, ptrdiff_t, T *,
+                                  T *> {
+  using BaseT = typename memoryaccess_def_iterator_base::iterator_facade_base;
+
+public:
+  memoryaccess_def_iterator_base(T *Start) : Access(Start) {}
+  memoryaccess_def_iterator_base() = default;
+
+  bool operator==(const memoryaccess_def_iterator_base &Other) const {
+    return Access == Other.Access && (!Access || ArgNo == Other.ArgNo);
+  }
+
+  // This is a bit ugly, but for MemoryPHI's, unlike PHINodes, you can't get the
+  // block from the operand in constant time (In a PHINode, the uselist has
+  // both, so it's just subtraction). We provide it as part of the
+  // iterator to avoid callers having to linear walk to get the block.
+  // If the operation becomes constant time on MemoryPHI's, this bit of
+  // abstraction breaking should be removed.
+  BasicBlock *getPhiArgBlock() const {
+    MemoryPhi *MP = dyn_cast<MemoryPhi>(Access);
+    assert(MP && "Tried to get phi arg block when not iterating over a PHI");
+    return MP->getIncomingBlock(ArgNo);
+  }
+  typename BaseT::iterator::pointer operator*() const {
+    assert(Access && "Tried to access past the end of our iterator");
+    // Go to the first argument for phis, and the defining access for everything
+    // else.
+    if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Access))
+      return MP->getIncomingValue(ArgNo);
+    return cast<MemoryUseOrDef>(Access)->getDefiningAccess();
+  }
+  using BaseT::operator++;
+  memoryaccess_def_iterator &operator++() {
+    assert(Access && "Hit end of iterator");
+    if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Access)) {
+      if (++ArgNo >= MP->getNumIncomingValues()) {
+        ArgNo = 0;
+        Access = nullptr;
+      }
+    } else {
+      Access = nullptr;
+    }
+    return *this;
+  }
+
+private:
+  T *Access = nullptr;
+  unsigned ArgNo = 0;
+};
+
+inline memoryaccess_def_iterator MemoryAccess::defs_begin() {
+  return memoryaccess_def_iterator(this);
+}
+
+inline const_memoryaccess_def_iterator MemoryAccess::defs_begin() const {
+  return const_memoryaccess_def_iterator(this);
+}
+
+inline memoryaccess_def_iterator MemoryAccess::defs_end() {
+  return memoryaccess_def_iterator();
+}
+
+inline const_memoryaccess_def_iterator MemoryAccess::defs_end() const {
+  return const_memoryaccess_def_iterator();
+}
+
+/// \brief GraphTraits for a MemoryAccess, which walks defs in the normal case,
+/// and uses in the inverse case.
+template <> struct GraphTraits<MemoryAccess *> {
+  using NodeRef = MemoryAccess *;
+  using ChildIteratorType = memoryaccess_def_iterator;
+
+  static NodeRef getEntryNode(NodeRef N) { return N; }
+  static ChildIteratorType child_begin(NodeRef N) { return N->defs_begin(); }
+  static ChildIteratorType child_end(NodeRef N) { return N->defs_end(); }
+};
+
+template <> struct GraphTraits<Inverse<MemoryAccess *>> {
+  using NodeRef = MemoryAccess *;
+  using ChildIteratorType = MemoryAccess::iterator;
+
+  static NodeRef getEntryNode(NodeRef N) { return N; }
+  static ChildIteratorType child_begin(NodeRef N) { return N->user_begin(); }
+  static ChildIteratorType child_end(NodeRef N) { return N->user_end(); }
+};
+
+/// \brief Provide an iterator that walks defs, giving both the memory access,
+/// and the current pointer location, updating the pointer location as it
+/// changes due to phi node translation.
+///
+/// This iterator, while somewhat specialized, is what most clients actually
+/// want when walking upwards through MemorySSA def chains. It takes a pair of
+/// <MemoryAccess,MemoryLocation>, and walks defs, properly translating the
+/// memory location through phi nodes for the user.
+class upward_defs_iterator
+    : public iterator_facade_base<upward_defs_iterator,
+                                  std::forward_iterator_tag,
+                                  const MemoryAccessPair> {
+  using BaseT = upward_defs_iterator::iterator_facade_base;
+
+public:
+  upward_defs_iterator(const MemoryAccessPair &Info)
+      : DefIterator(Info.first), Location(Info.second),
+        OriginalAccess(Info.first) {
+    CurrentPair.first = nullptr;
+
+    WalkingPhi = Info.first && isa<MemoryPhi>(Info.first);
+    fillInCurrentPair();
+  }
+
+  upward_defs_iterator() { CurrentPair.first = nullptr; }
+
+  bool operator==(const upward_defs_iterator &Other) const {
+    return DefIterator == Other.DefIterator;
+  }
+
+  BaseT::iterator::reference operator*() const {
+    assert(DefIterator != OriginalAccess->defs_end() &&
+           "Tried to access past the end of our iterator");
+    return CurrentPair;
+  }
+
+  using BaseT::operator++;
+  upward_defs_iterator &operator++() {
+    assert(DefIterator != OriginalAccess->defs_end() &&
+           "Tried to access past the end of the iterator");
+    ++DefIterator;
+    if (DefIterator != OriginalAccess->defs_end())
+      fillInCurrentPair();
+    return *this;
+  }
+
+  BasicBlock *getPhiArgBlock() const { return DefIterator.getPhiArgBlock(); }
+
+private:
+  void fillInCurrentPair() {
+    CurrentPair.first = *DefIterator;
+    if (WalkingPhi && Location.Ptr) {
+      PHITransAddr Translator(
+          const_cast<Value *>(Location.Ptr),
+          OriginalAccess->getBlock()->getModule()->getDataLayout(), nullptr);
+      if (!Translator.PHITranslateValue(OriginalAccess->getBlock(),
+                                        DefIterator.getPhiArgBlock(), nullptr,
+                                        false))
+        if (Translator.getAddr() != Location.Ptr) {
+          CurrentPair.second = Location.getWithNewPtr(Translator.getAddr());
+          return;
+        }
+    }
+    CurrentPair.second = Location;
+  }
+
+  MemoryAccessPair CurrentPair;
+  memoryaccess_def_iterator DefIterator;
+  MemoryLocation Location;
+  MemoryAccess *OriginalAccess = nullptr;
+  bool WalkingPhi = false;
+};
+
+inline upward_defs_iterator upward_defs_begin(const MemoryAccessPair &Pair) {
+  return upward_defs_iterator(Pair);
+}
+
+inline upward_defs_iterator upward_defs_end() { return upward_defs_iterator(); }
+
+inline iterator_range<upward_defs_iterator>
+upward_defs(const MemoryAccessPair &Pair) {
+  return make_range(upward_defs_begin(Pair), upward_defs_end());
+}
+
+/// Walks the defining accesses of MemoryDefs. Stops after we hit something that
+/// has no defining use (e.g. a MemoryPhi or liveOnEntry). Note that, when
+/// comparing against a null def_chain_iterator, this will compare equal only
+/// after walking said Phi/liveOnEntry.
+///
+/// The UseOptimizedChain flag specifies whether to walk the clobbering
+/// access chain, or all the accesses.
+///
+/// Normally, MemoryDef are all just def/use linked together, so a def_chain on
+/// a MemoryDef will walk all MemoryDefs above it in the program until it hits
+/// a phi node.  The optimized chain walks the clobbering access of a store.
+/// So if you are just trying to find, given a store, what the next
+/// thing that would clobber the same memory is, you want the optimized chain.
+template <class T, bool UseOptimizedChain = false>
+struct def_chain_iterator
+    : public iterator_facade_base<def_chain_iterator<T, UseOptimizedChain>,
+                                  std::forward_iterator_tag, MemoryAccess *> {
+  def_chain_iterator() : MA(nullptr) {}
+  def_chain_iterator(T MA) : MA(MA) {}
+
+  T operator*() const { return MA; }
+
+  def_chain_iterator &operator++() {
+    // N.B. liveOnEntry has a null defining access.
+    if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) {
+      if (UseOptimizedChain && MUD->isOptimized())
+        MA = MUD->getOptimized();
+      else
+        MA = MUD->getDefiningAccess();
+    } else {
+      MA = nullptr;
+    }
+
+    return *this;
+  }
+
+  bool operator==(const def_chain_iterator &O) const { return MA == O.MA; }
+
+private:
+  T MA;
+};
+
+template <class T>
+inline iterator_range<def_chain_iterator<T>>
+def_chain(T MA, MemoryAccess *UpTo = nullptr) {
+#ifdef EXPENSIVE_CHECKS
+  assert((!UpTo || find(def_chain(MA), UpTo) != def_chain_iterator<T>()) &&
+         "UpTo isn't in the def chain!");
+#endif
+  return make_range(def_chain_iterator<T>(MA), def_chain_iterator<T>(UpTo));
+}
+
+template <class T>
+inline iterator_range<def_chain_iterator<T, true>> optimized_def_chain(T MA) {
+  return make_range(def_chain_iterator<T, true>(MA),
+                    def_chain_iterator<T, true>(nullptr));
+}
+
+} // end namespace llvm
+
+#endif // LLVM_ANALYSIS_MEMORYSSA_H
diff --git a/include/llvm/Analysis/MemorySSAUpdater.h b/include/llvm/Analysis/MemorySSAUpdater.h
new file mode 100644
index 000000000000..d30eeeaa95b6
--- /dev/null
+++ b/include/llvm/Analysis/MemorySSAUpdater.h
@@ -0,0 +1,153 @@
+//===- MemorySSAUpdater.h - Memory SSA Updater-------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// \brief An automatic updater for MemorySSA that handles arbitrary insertion,
+// deletion, and moves.  It performs phi insertion where necessary, and
+// automatically updates the MemorySSA IR to be correct.
+// While updating loads or removing instructions is often easy enough to not
+// need this, updating stores should generally not be attemped outside this
+// API.
+//
+// Basic API usage:
+// Create the memory access you want for the instruction (this is mainly so
+// we know where it is, without having to duplicate the entire set of create
+// functions MemorySSA supports).
+// Call insertDef or insertUse depending on whether it's a MemoryUse or a
+// MemoryDef.
+// That's it.
+//
+// For moving, first, move the instruction itself using the normal SSA
+// instruction moving API, then just call moveBefore, moveAfter,or moveTo with
+// the right arguments.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_MEMORYSSAUPDATER_H
+#define LLVM_ANALYSIS_MEMORYSSAUPDATER_H
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/OperandTraits.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Analysis/MemorySSA.h"
+
+namespace llvm {
+
+class Function;
+class Instruction;
+class MemoryAccess;
+class LLVMContext;
+class raw_ostream;
+
+class MemorySSAUpdater {
+private:
+  MemorySSA *MSSA;
+  SmallVector<MemoryPhi *, 8> InsertedPHIs;
+  SmallPtrSet<BasicBlock *, 8> VisitedBlocks;
+
+public:
+  MemorySSAUpdater(MemorySSA *MSSA) : MSSA(MSSA) {}
+  /// Insert a definition into the MemorySSA IR.  RenameUses will rename any use
+  /// below the new def block (and any inserted phis).  RenameUses should be set
+  /// to true if the definition may cause new aliases for loads below it.  This
+  /// is not the case for hoisting or sinking or other forms of code *movement*.
+  /// It *is* the case for straight code insertion.
+  /// For example:
+  /// store a
+  /// if (foo) { }
+  /// load a
+  ///
+  /// Moving the store into the if block, and calling insertDef, does not
+  /// require RenameUses.
+  /// However, changing it to:
+  /// store a
+  /// if (foo) { store b }
+  /// load a
+  /// Where a mayalias b, *does* require RenameUses be set to true.
+  void insertDef(MemoryDef *Def, bool RenameUses = false);
+  void insertUse(MemoryUse *Use);
+  void moveBefore(MemoryUseOrDef *What, MemoryUseOrDef *Where);
+  void moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where);
+  void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB,
+                   MemorySSA::InsertionPlace Where);
+
+  // The below are utility functions. Other than creation of accesses to pass
+  // to insertDef, and removeAccess to remove accesses, you should generally
+  // not attempt to update memoryssa yourself. It is very non-trivial to get
+  // the edge cases right, and the above calls already operate in near-optimal
+  // time bounds.
+
+  /// \brief Create a MemoryAccess in MemorySSA at a specified point in a block,
+  /// with a specified clobbering definition.
+  ///
+  /// Returns the new MemoryAccess.
+  /// This should be called when a memory instruction is created that is being
+  /// used to replace an existing memory instruction. It will *not* create PHI
+  /// nodes, or verify the clobbering definition. The insertion place is used
+  /// solely to determine where in the memoryssa access lists the instruction
+  /// will be placed. The caller is expected to keep ordering the same as
+  /// instructions.
+  /// It will return the new MemoryAccess.
+  /// Note: If a MemoryAccess already exists for I, this function will make it
+  /// inaccessible and it *must* have removeMemoryAccess called on it.
+  MemoryAccess *createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition,
+                                       const BasicBlock *BB,
+                                       MemorySSA::InsertionPlace Point);
+
+  /// \brief Create a MemoryAccess in MemorySSA before or after an existing
+  /// MemoryAccess.
+  ///
+  /// Returns the new MemoryAccess.
+  /// This should be called when a memory instruction is created that is being
+  /// used to replace an existing memory instruction. It will *not* create PHI
+  /// nodes, or verify the clobbering definition.
+  ///
+  /// Note: If a MemoryAccess already exists for I, this function will make it
+  /// inaccessible and it *must* have removeMemoryAccess called on it.
+  MemoryUseOrDef *createMemoryAccessBefore(Instruction *I,
+                                           MemoryAccess *Definition,
+                                           MemoryUseOrDef *InsertPt);
+  MemoryUseOrDef *createMemoryAccessAfter(Instruction *I,
+                                          MemoryAccess *Definition,
+                                          MemoryAccess *InsertPt);
+
+  /// \brief Remove a MemoryAccess from MemorySSA, including updating all
+  /// definitions and uses.
+  /// This should be called when a memory instruction that has a MemoryAccess
+  /// associated with it is erased from the program.  For example, if a store or
+  /// load is simply erased (not replaced), removeMemoryAccess should be called
+  /// on the MemoryAccess for that store/load.
+  void removeMemoryAccess(MemoryAccess *);
+
+private:
+  // Move What before Where in the MemorySSA IR.
+  template <class WhereType>
+  void moveTo(MemoryUseOrDef *What, BasicBlock *BB, WhereType Where);
+  MemoryAccess *getPreviousDef(MemoryAccess *);
+  MemoryAccess *getPreviousDefInBlock(MemoryAccess *);
+  MemoryAccess *getPreviousDefFromEnd(BasicBlock *);
+  MemoryAccess *getPreviousDefRecursive(BasicBlock *);
+  MemoryAccess *recursePhi(MemoryAccess *Phi);
+  template <class RangeType>
+  MemoryAccess *tryRemoveTrivialPhi(MemoryPhi *Phi, RangeType &Operands);
+  void fixupDefs(const SmallVectorImpl<MemoryAccess *> &);
+};
+} // end namespace llvm
+
+#endif // LLVM_ANALYSIS_MEMORYSSAUPDATER_H
diff --git a/include/llvm/Analysis/ObjectUtils.h b/include/llvm/Analysis/ObjectUtils.h
new file mode 100644
index 000000000000..2ad3b1717009
--- /dev/null
+++ b/include/llvm/Analysis/ObjectUtils.h
@@ -0,0 +1,42 @@
+//===- Analysis/ObjectUtils.h - analysis utils for object files -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_OBJECT_UTILS_H
+#define LLVM_ANALYSIS_OBJECT_UTILS_H
+
+#include "llvm/IR/GlobalVariable.h"
+
+namespace llvm {
+
+/// True if GV can be left out of the object symbol table. This is the case
+/// for linkonce_odr values whose address is not significant. While legal, it is
+/// not normally profitable to omit them from the .o symbol table. Using this
+/// analysis makes sense when the information can be passed down to the linker
+/// or we are in LTO.
+inline bool canBeOmittedFromSymbolTable(const GlobalValue *GV) {
+  if (!GV->hasLinkOnceODRLinkage())
+    return false;
+
+  // We assume that anyone who sets global unnamed_addr on a non-constant knows
+  // what they're doing.
+  if (GV->hasGlobalUnnamedAddr())
+    return true;
+
+  // If it is a non constant variable, it needs to be uniqued across shared
+  // objects.
+  if (auto *Var = dyn_cast<GlobalVariable>(GV))
+    if (!Var->isConstant())
+      return false;
+
+  return GV->hasAtLeastLocalUnnamedAddr();
+}
+
+}
+
+#endif
diff --git a/include/llvm/Analysis/OptimizationDiagnosticInfo.h b/include/llvm/Analysis/OptimizationDiagnosticInfo.h
index 39269269c244..edd9140a3493 100644
--- a/include/llvm/Analysis/OptimizationDiagnosticInfo.h
+++ b/include/llvm/Analysis/OptimizationDiagnosticInfo.h
@@ -38,7 +38,7 @@ class Value;
 /// enabled in the LLVM context.
 class OptimizationRemarkEmitter {
 public:
-  OptimizationRemarkEmitter(Function *F, BlockFrequencyInfo *BFI)
+  OptimizationRemarkEmitter(const Function *F, BlockFrequencyInfo *BFI)
       : F(F), BFI(BFI) {}
 
   /// \brief This variant can be used to generate ORE on demand (without the
@@ -52,7 +52,7 @@ public:
   /// operation since BFI and all its required analyses are computed.  This is
   /// for example useful for CGSCC passes that can't use function analyses
   /// passes in the old PM.
-  OptimizationRemarkEmitter(Function *F);
+  OptimizationRemarkEmitter(const Function *F);
 
   OptimizationRemarkEmitter(OptimizationRemarkEmitter &&Arg)
       : F(Arg.F), BFI(Arg.BFI) {}
@@ -63,136 +63,16 @@ public:
     return *this;
   }
 
-  /// The new interface to emit remarks.
-  void emit(DiagnosticInfoOptimizationBase &OptDiag);
-
-  /// Emit an optimization-applied message.
-  ///
-  /// \p PassName is the name of the pass emitting the message. If -Rpass= is
-  /// given and \p PassName matches the regular expression in -Rpass, then the
-  /// remark will be emitted. \p Fn is the function triggering the remark, \p
-  /// DLoc is the debug location where the diagnostic is generated. \p V is the
-  /// IR Value that identifies the code region. \p Msg is the message string to
-  /// use.
-  void emitOptimizationRemark(const char *PassName, const DebugLoc &DLoc,
-                              const Value *V, const Twine &Msg);
-
-  /// \brief Same as above but derives the IR Value for the code region and the
-  /// debug location from the Loop parameter \p L.
-  void emitOptimizationRemark(const char *PassName, Loop *L, const Twine &Msg);
-
-  /// \brief Same as above but derives the debug location and the code region
-  /// from the debug location and the basic block of \p Inst, respectively.
-  void emitOptimizationRemark(const char *PassName, Instruction *Inst,
-                              const Twine &Msg) {
-    emitOptimizationRemark(PassName, Inst->getDebugLoc(), Inst->getParent(),
-                           Msg);
-  }
-
-  /// Emit an optimization-missed message.
-  ///
-  /// \p PassName is the name of the pass emitting the message. If
-  /// -Rpass-missed= is given and the name matches the regular expression in
-  /// -Rpass, then the remark will be emitted.  \p DLoc is the debug location
-  /// where the diagnostic is generated. \p V is the IR Value that identifies
-  /// the code region. \p Msg is the message string to use.  If \p IsVerbose is
-  /// true, the message is considered verbose and will only be emitted when
-  /// verbose output is turned on.
-  void emitOptimizationRemarkMissed(const char *PassName, const DebugLoc &DLoc,
-                                    const Value *V, const Twine &Msg,
-                                    bool IsVerbose = false);
-
-  /// \brief Same as above but derives the IR Value for the code region and the
-  /// debug location from the Loop parameter \p L.
-  void emitOptimizationRemarkMissed(const char *PassName, Loop *L,
-                                    const Twine &Msg, bool IsVerbose = false);
-
-  /// \brief Same as above but derives the debug location and the code region
-  /// from the debug location and the basic block of \p Inst, respectively.
-  void emitOptimizationRemarkMissed(const char *PassName, Instruction *Inst,
-                                    const Twine &Msg, bool IsVerbose = false) {
-    emitOptimizationRemarkMissed(PassName, Inst->getDebugLoc(),
-                                 Inst->getParent(), Msg, IsVerbose);
-  }
-
-  /// Emit an optimization analysis remark message.
-  ///
-  /// \p PassName is the name of the pass emitting the message. If
-  /// -Rpass-analysis= is given and \p PassName matches the regular expression
-  /// in -Rpass, then the remark will be emitted. \p DLoc is the debug location
-  /// where the diagnostic is generated. \p V is the IR Value that identifies
-  /// the code region. \p Msg is the message string to use. If \p IsVerbose is
-  /// true, the message is considered verbose and will only be emitted when
-  /// verbose output is turned on.
-  void emitOptimizationRemarkAnalysis(const char *PassName,
-                                      const DebugLoc &DLoc, const Value *V,
-                                      const Twine &Msg, bool IsVerbose = false);
-
-  /// \brief Same as above but derives the IR Value for the code region and the
-  /// debug location from the Loop parameter \p L.
-  void emitOptimizationRemarkAnalysis(const char *PassName, Loop *L,
-                                      const Twine &Msg, bool IsVerbose = false);
+  /// Handle invalidation events in the new pass manager.
+  bool invalidate(Function &F, const PreservedAnalyses &PA,
+                  FunctionAnalysisManager::Invalidator &Inv);
 
-  /// \brief Same as above but derives the debug location and the code region
-  /// from the debug location and the basic block of \p Inst, respectively.
-  void emitOptimizationRemarkAnalysis(const char *PassName, Instruction *Inst,
-                                      const Twine &Msg,
-                                      bool IsVerbose = false) {
-    emitOptimizationRemarkAnalysis(PassName, Inst->getDebugLoc(),
-                                   Inst->getParent(), Msg, IsVerbose);
-  }
-
-  /// \brief This variant allows specifying what should be emitted for missed
-  /// and analysis remarks in one call.
-  ///
-  /// \p PassName is the name of the pass emitting the message. If
-  /// -Rpass-missed= is given and \p PassName matches the regular expression, \p
-  /// MsgForMissedRemark is emitted.
-  ///
-  /// If -Rpass-analysis= is given and \p PassName matches the regular
-  /// expression, \p MsgForAnalysisRemark is emitted.
-  ///
-  /// The debug location and the code region is derived from \p Inst. If \p
-  /// IsVerbose is true, the message is considered verbose and will only be
-  /// emitted when verbose output is turned on.
-  void emitOptimizationRemarkMissedAndAnalysis(
-      const char *PassName, Instruction *Inst, const Twine &MsgForMissedRemark,
-      const Twine &MsgForAnalysisRemark, bool IsVerbose = false) {
-    emitOptimizationRemarkAnalysis(PassName, Inst, MsgForAnalysisRemark,
-                                   IsVerbose);
-    emitOptimizationRemarkMissed(PassName, Inst, MsgForMissedRemark, IsVerbose);
-  }
-
-  /// \brief Emit an optimization analysis remark related to floating-point
-  /// non-commutativity.
+  /// \brief Output the remark via the diagnostic handler and to the
+  /// optimization record file.
   ///
-  /// \p PassName is the name of the pass emitting the message. If
-  /// -Rpass-analysis= is given and \p PassName matches the regular expression
-  /// in -Rpass, then the remark will be emitted. \p Fn is the function
-  /// triggering the remark, \p DLoc is the debug location where the diagnostic
-  /// is generated.\p V is the IR Value that identifies the code region.  \p Msg
-  /// is the message string to use.
-  void emitOptimizationRemarkAnalysisFPCommute(const char *PassName,
-                                               const DebugLoc &DLoc,
-                                               const Value *V,
-                                               const Twine &Msg);
-
-  /// \brief Emit an optimization analysis remark related to pointer aliasing.
-  ///
-  /// \p PassName is the name of the pass emitting the message. If
-  /// -Rpass-analysis= is given and \p PassName matches the regular expression
-  /// in -Rpass, then the remark will be emitted. \p Fn is the function
-  /// triggering the remark, \p DLoc is the debug location where the diagnostic
-  /// is generated.\p V is the IR Value that identifies the code region.  \p Msg
-  /// is the message string to use.
-  void emitOptimizationRemarkAnalysisAliasing(const char *PassName,
-                                              const DebugLoc &DLoc,
-                                              const Value *V, const Twine &Msg);
-
-  /// \brief Same as above but derives the IR Value for the code region and the
-  /// debug location from the Loop parameter \p L.
-  void emitOptimizationRemarkAnalysisAliasing(const char *PassName, Loop *L,
-                                              const Twine &Msg);
+  /// This is the new interface that should be now used rather than the legacy
+  /// emit* APIs.
+  void emit(DiagnosticInfoOptimizationBase &OptDiag);
 
   /// \brief Whether we allow for extra compile-time budget to perform more
   /// analysis to produce fewer false positives.
@@ -208,7 +88,7 @@ public:
   }
 
 private:
-  Function *F;
+  const Function *F;
 
   BlockFrequencyInfo *BFI;
 
@@ -220,7 +100,7 @@ private:
   Optional<uint64_t> computeHotness(const Value *V);
 
   /// Similar but use value from \p OptDiag and update hotness there.
-  void computeHotness(DiagnosticInfoOptimizationBase &OptDiag);
+  void computeHotness(DiagnosticInfoIROptimization &OptDiag);
 
   /// \brief Only allow verbose messages if we know we're filtering by hotness
   /// (BFI is only set in this case).
@@ -274,5 +154,11 @@ public:
   /// \brief Run the analysis pass over a function and produce BFI.
   Result run(Function &F, FunctionAnalysisManager &AM);
 };
+
+namespace yaml {
+template <> struct MappingTraits<DiagnosticInfoOptimizationBase *> {
+  static void mapping(IO &io, DiagnosticInfoOptimizationBase *&OptDiag);
+};
+}
 }
 #endif // LLVM_IR_OPTIMIZATIONDIAGNOSTICINFO_H
diff --git a/include/llvm/Analysis/PostDominators.h b/include/llvm/Analysis/PostDominators.h
index 34361dac8c16..94ee3b03bb86 100644
--- a/include/llvm/Analysis/PostDominators.h
+++ b/include/llvm/Analysis/PostDominators.h
@@ -26,6 +26,10 @@ struct PostDominatorTree : public DominatorTreeBase<BasicBlock> {
   typedef DominatorTreeBase<BasicBlock> Base;
 
   PostDominatorTree() : DominatorTreeBase<BasicBlock>(true) {}
+
+  /// Handle invalidation explicitly.
+  bool invalidate(Function &F, const PreservedAnalyses &PA,
+                  FunctionAnalysisManager::Invalidator &);
 };
 
 /// \brief Analysis pass which computes a \c PostDominatorTree.
diff --git a/include/llvm/Analysis/ProfileSummaryInfo.h b/include/llvm/Analysis/ProfileSummaryInfo.h
index d7fe76e278e3..1aec35c3e677 100644
--- a/include/llvm/Analysis/ProfileSummaryInfo.h
+++ b/include/llvm/Analysis/ProfileSummaryInfo.h
@@ -29,6 +29,7 @@
 namespace llvm {
 class BasicBlock;
 class BlockFrequencyInfo;
+class CallSite;
 class ProfileSummary;
 /// \brief Analysis providing profile information.
 ///
@@ -44,7 +45,7 @@ class ProfileSummaryInfo {
 private:
   Module &M;
   std::unique_ptr<ProfileSummary> Summary;
-  void computeSummary();
+  bool computeSummary();
   void computeThresholds();
   // Count thresholds to answer isHotCount and isColdCount queries.
   Optional<uint64_t> HotCountThreshold, ColdCountThreshold;
@@ -53,16 +54,29 @@ public:
   ProfileSummaryInfo(Module &M) : M(M) {}
   ProfileSummaryInfo(ProfileSummaryInfo &&Arg)
       : M(Arg.M), Summary(std::move(Arg.Summary)) {}
+  /// Returns the profile count for \p CallInst.
+  static Optional<uint64_t> getProfileCount(const Instruction *CallInst,
+                                            BlockFrequencyInfo *BFI);
   /// \brief Returns true if \p F has hot function entry.
   bool isFunctionEntryHot(const Function *F);
+  /// Returns true if \p F has hot function entry or hot call edge.
+  bool isFunctionHotInCallGraph(const Function *F);
   /// \brief Returns true if \p F has cold function entry.
   bool isFunctionEntryCold(const Function *F);
+  /// Returns true if \p F has cold function entry or cold call edge.
+  bool isFunctionColdInCallGraph(const Function *F);
   /// \brief Returns true if \p F is a hot function.
   bool isHotCount(uint64_t C);
   /// \brief Returns true if count \p C is considered cold.
   bool isColdCount(uint64_t C);
   /// \brief Returns true if BasicBlock \p B is considered hot.
   bool isHotBB(const BasicBlock *B, BlockFrequencyInfo *BFI);
+  /// \brief Returns true if BasicBlock \p B is considered cold.
+  bool isColdBB(const BasicBlock *B, BlockFrequencyInfo *BFI);
+  /// \brief Returns true if CallSite \p CS is considered hot.
+  bool isHotCallSite(const CallSite &CS, BlockFrequencyInfo *BFI);
+  /// \brief Returns true if Callsite \p CS is considered cold.
+  bool isColdCallSite(const CallSite &CS, BlockFrequencyInfo *BFI);
 };
 
 /// An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
diff --git a/include/llvm/Analysis/PtrUseVisitor.h b/include/llvm/Analysis/PtrUseVisitor.h
index 6e61fc3be384..2fe7c6725266 100644
--- a/include/llvm/Analysis/PtrUseVisitor.h
+++ b/include/llvm/Analysis/PtrUseVisitor.h
@@ -196,7 +196,10 @@ class PtrUseVisitor : protected InstVisitor<DerivedT>,
   typedef InstVisitor<DerivedT> Base;
 
 public:
-  PtrUseVisitor(const DataLayout &DL) : PtrUseVisitorBase(DL) {}
+  PtrUseVisitor(const DataLayout &DL) : PtrUseVisitorBase(DL) {
+    static_assert(std::is_base_of<PtrUseVisitor, DerivedT>::value,
+                  "Must pass the derived type to this template!");
+  }
 
   /// \brief Recursively visit the uses of the given pointer.
   /// \returns An info struct about the pointer. See \c PtrInfo for details.
diff --git a/include/llvm/Analysis/RegionInfo.h b/include/llvm/Analysis/RegionInfo.h
index f2f27a137a88..caeb21db613e 100644
--- a/include/llvm/Analysis/RegionInfo.h
+++ b/include/llvm/Analysis/RegionInfo.h
@@ -886,6 +886,10 @@ public:
     return *this;
   }
 
+  /// Handle invalidation explicitly.
+  bool invalidate(Function &F, const PreservedAnalyses &PA,
+                  FunctionAnalysisManager::Invalidator &);
+
   // updateStatistics - Update statistic about created regions.
   void updateStatistics(Region *R) final;
 
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index 1a93f9aa5fd2..9a50de540f2b 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -543,6 +543,12 @@ private:
   /// predicate by splitting it into a set of independent predicates.
   bool ProvingSplitPredicate;
 
+  /// Memoized values for the GetMinTrailingZeros
+  DenseMap<const SCEV *, uint32_t> MinTrailingZerosCache;
+
+  /// Private helper method for the GetMinTrailingZeros method
+  uint32_t GetMinTrailingZerosImpl(const SCEV *S);
+
   /// Information about the number of loop iterations for which a loop exit's
   /// branch condition evaluates to the not-taken path.  This is a temporary
   /// pair of exact and max expressions that are eventually summarized in
@@ -600,14 +606,14 @@ private:
   /// Information about the number of times a particular loop exit may be
   /// reached before exiting the loop.
   struct ExitNotTakenInfo {
-    AssertingVH<BasicBlock> ExitingBlock;
+    PoisoningVH<BasicBlock> ExitingBlock;
     const SCEV *ExactNotTaken;
     std::unique_ptr<SCEVUnionPredicate> Predicate;
     bool hasAlwaysTruePredicate() const {
       return !Predicate || Predicate->isAlwaysTrue();
     }
 
-    explicit ExitNotTakenInfo(AssertingVH<BasicBlock> ExitingBlock,
+    explicit ExitNotTakenInfo(PoisoningVH<BasicBlock> ExitingBlock,
                               const SCEV *ExactNotTaken,
                               std::unique_ptr<SCEVUnionPredicate> Predicate)
         : ExitingBlock(ExitingBlock), ExactNotTaken(ExactNotTaken),
@@ -972,6 +978,20 @@ private:
 
   /// Test whether the condition described by Pred, LHS, and RHS is true
   /// whenever the condition described by Pred, FoundLHS, and FoundRHS is
+  /// true. Here LHS is an operation that includes FoundLHS as one of its
+  /// arguments.
+  bool isImpliedViaOperations(ICmpInst::Predicate Pred,
+                              const SCEV *LHS, const SCEV *RHS,
+                              const SCEV *FoundLHS, const SCEV *FoundRHS,
+                              unsigned Depth = 0);
+
+  /// Test whether the condition described by Pred, LHS, and RHS is true.
+  /// Use only simple non-recursive types of checks, such as range analysis etc.
+  bool isKnownViaSimpleReasoning(ICmpInst::Predicate Pred,
+                                 const SCEV *LHS, const SCEV *RHS);
+
+  /// Test whether the condition described by Pred, LHS, and RHS is true
+  /// whenever the condition described by Pred, FoundLHS, and FoundRHS is
   /// true.
   bool isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, const SCEV *LHS,
                                    const SCEV *RHS, const SCEV *FoundLHS,
@@ -1065,18 +1085,6 @@ private:
   bool isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS,
                                 ICmpInst::Predicate Pred, bool &Increasing);
 
-  /// Return true if, for all loop invariant X, the predicate "LHS `Pred` X"
-  /// is monotonically increasing or decreasing.  In the former case set
-  /// `Increasing` to true and in the latter case set `Increasing` to false.
-  ///
-  /// A predicate is said to be monotonically increasing if may go from being
-  /// false to being true as the loop iterates, but never the other way
-  /// around.  A predicate is said to be monotonically decreasing if may go
-  /// from being true to being false as the loop iterates, but never the other
-  /// way around.
-  bool isMonotonicPredicate(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred,
-                            bool &Increasing);
-
   /// Return SCEV no-wrap flags that can be proven based on reasoning about
   /// how poison produced from no-wrap flags on this value (e.g. a nuw add)
   /// would trigger undefined behavior on overflow.
@@ -1129,6 +1137,9 @@ public:
   /// return true. For pointer types, this is the pointer-sized integer type.
   Type *getEffectiveSCEVType(Type *Ty) const;
 
+  // Returns a wider type among {Ty1, Ty2}.
+  Type *getWiderType(Type *Ty1, Type *Ty2) const;
+
   /// Return true if the SCEV is a scAddRecExpr or it contains
   /// scAddRecExpr. The result will be cached in HasRecMap.
   ///
@@ -1152,7 +1163,8 @@ public:
   const SCEV *getSignExtendExpr(const SCEV *Op, Type *Ty);
   const SCEV *getAnyExtendExpr(const SCEV *Op, Type *Ty);
   const SCEV *getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
-                         SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap);
+                         SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap,
+                         unsigned Depth = 0);
   const SCEV *getAddExpr(const SCEV *LHS, const SCEV *RHS,
                          SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) {
     SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
@@ -1301,7 +1313,7 @@ public:
   ///
   /// Implemented in terms of the \c getSmallConstantTripCount overload with
   /// the single exiting block passed to it. See that routine for details.
-  unsigned getSmallConstantTripCount(Loop *L);
+  unsigned getSmallConstantTripCount(const Loop *L);
 
   /// Returns the maximum trip count of this loop as a normal unsigned
   /// value. Returns 0 if the trip count is unknown or not constant. This
@@ -1310,12 +1322,12 @@ public:
   /// before taking the branch. For loops with multiple exits, it may not be
   /// the number times that the loop header executes if the loop exits
   /// prematurely via another branch.
-  unsigned getSmallConstantTripCount(Loop *L, BasicBlock *ExitingBlock);
+  unsigned getSmallConstantTripCount(const Loop *L, BasicBlock *ExitingBlock);
 
   /// Returns the upper bound of the loop trip count as a normal unsigned
   /// value.
   /// Returns 0 if the trip count is unknown or not constant.
-  unsigned getSmallConstantMaxTripCount(Loop *L);
+  unsigned getSmallConstantMaxTripCount(const Loop *L);
 
   /// Returns the largest constant divisor of the trip count of the
   /// loop if it is a single-exit loop and we can compute a small maximum for
@@ -1323,7 +1335,7 @@ public:
   ///
   /// Implemented in terms of the \c getSmallConstantTripMultiple overload with
   /// the single exiting block passed to it. See that routine for details.
-  unsigned getSmallConstantTripMultiple(Loop *L);
+  unsigned getSmallConstantTripMultiple(const Loop *L);
 
   /// Returns the largest constant divisor of the trip count of this loop as a
   /// normal unsigned value, if possible. This means that the actual trip
@@ -1331,12 +1343,13 @@ public:
   /// count could very well be zero as well!). As explained in the comments
   /// for getSmallConstantTripCount, this assumes that control exits the loop
   /// via ExitingBlock.
-  unsigned getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitingBlock);
+  unsigned getSmallConstantTripMultiple(const Loop *L,
+                                        BasicBlock *ExitingBlock);
 
   /// Get the expression for the number of loop iterations for which this loop
   /// is guaranteed not to exit via ExitingBlock. Otherwise return
   /// SCEVCouldNotCompute.
-  const SCEV *getExitCount(Loop *L, BasicBlock *ExitingBlock);
+  const SCEV *getExitCount(const Loop *L, BasicBlock *ExitingBlock);
 
   /// If the specified loop has a predictable backedge-taken count, return it,
   /// otherwise return a SCEVCouldNotCompute object. The backedge-taken count
@@ -1432,6 +1445,18 @@ public:
   bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS,
                         const SCEV *RHS);
 
+  /// Return true if, for all loop invariant X, the predicate "LHS `Pred` X"
+  /// is monotonically increasing or decreasing.  In the former case set
+  /// `Increasing` to true and in the latter case set `Increasing` to false.
+  ///
+  /// A predicate is said to be monotonically increasing if may go from being
+  /// false to being true as the loop iterates, but never the other way
+  /// around.  A predicate is said to be monotonically decreasing if may go
+  /// from being true to being false as the loop iterates, but never the other
+  /// way around.
+  bool isMonotonicPredicate(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred,
+                            bool &Increasing);
+
   /// Return true if the result of the predicate LHS `Pred` RHS is loop
   /// invariant with respect to L.  Set InvariantPred, InvariantLHS and
   /// InvariantLHS so that InvariantLHS `InvariantPred` InvariantRHS is the
@@ -1613,6 +1638,10 @@ private:
   bool doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, bool IsSigned,
                           bool NoWrap);
 
+  /// Get add expr already created or create a new one
+  const SCEV *getOrCreateAddExpr(SmallVectorImpl<const SCEV *> &Ops,
+                                 SCEV::NoWrapFlags Flags);
+
 private:
   FoldingSet<SCEV> UniqueSCEVs;
   FoldingSet<SCEVPredicate> UniquePreds;
diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h
index fdcd8be00dde..2c693bceb24d 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpressions.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h
@@ -595,58 +595,82 @@ namespace llvm {
 
     const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
       const SCEV *Operand = ((SC*)this)->visit(Expr->getOperand());
-      return SE.getTruncateExpr(Operand, Expr->getType());
+      return Operand == Expr->getOperand()
+                 ? Expr
+                 : SE.getTruncateExpr(Operand, Expr->getType());
     }
 
     const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
       const SCEV *Operand = ((SC*)this)->visit(Expr->getOperand());
-      return SE.getZeroExtendExpr(Operand, Expr->getType());
+      return Operand == Expr->getOperand()
+                 ? Expr
+                 : SE.getZeroExtendExpr(Operand, Expr->getType());
     }
 
     const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
       const SCEV *Operand = ((SC*)this)->visit(Expr->getOperand());
-      return SE.getSignExtendExpr(Operand, Expr->getType());
+      return Operand == Expr->getOperand()
+                 ? Expr
+                 : SE.getSignExtendExpr(Operand, Expr->getType());
     }
 
     const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
       SmallVector<const SCEV *, 2> Operands;
-      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
-        Operands.push_back(((SC*)this)->visit(Expr->getOperand(i)));
-      return SE.getAddExpr(Operands);
+      bool Changed = false;
+      for (auto *Op : Expr->operands()) {
+        Operands.push_back(((SC*)this)->visit(Op));
+        Changed |= Op != Operands.back();
+      }
+      return !Changed ? Expr : SE.getAddExpr(Operands);
     }
 
     const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
       SmallVector<const SCEV *, 2> Operands;
-      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
-        Operands.push_back(((SC*)this)->visit(Expr->getOperand(i)));
-      return SE.getMulExpr(Operands);
+      bool Changed = false;
+      for (auto *Op : Expr->operands()) {
+        Operands.push_back(((SC*)this)->visit(Op));
+        Changed |= Op != Operands.back();
+      }
+      return !Changed ? Expr : SE.getMulExpr(Operands);
     }
 
     const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
-      return SE.getUDivExpr(((SC*)this)->visit(Expr->getLHS()),
-                            ((SC*)this)->visit(Expr->getRHS()));
+      auto *LHS = ((SC *)this)->visit(Expr->getLHS());
+      auto *RHS = ((SC *)this)->visit(Expr->getRHS());
+      bool Changed = LHS != Expr->getLHS() || RHS != Expr->getRHS();
+      return !Changed ? Expr : SE.getUDivExpr(LHS, RHS);
     }
 
     const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
       SmallVector<const SCEV *, 2> Operands;
-      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
-        Operands.push_back(((SC*)this)->visit(Expr->getOperand(i)));
-      return SE.getAddRecExpr(Operands, Expr->getLoop(),
-                              Expr->getNoWrapFlags());
+      bool Changed = false;
+      for (auto *Op : Expr->operands()) {
+        Operands.push_back(((SC*)this)->visit(Op));
+        Changed |= Op != Operands.back();
+      }
+      return !Changed ? Expr
+                      : SE.getAddRecExpr(Operands, Expr->getLoop(),
+                                         Expr->getNoWrapFlags());
     }
 
     const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
       SmallVector<const SCEV *, 2> Operands;
-      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
-        Operands.push_back(((SC*)this)->visit(Expr->getOperand(i)));
-      return SE.getSMaxExpr(Operands);
+      bool Changed = false;
+      for (auto *Op : Expr->operands()) {
+        Operands.push_back(((SC *)this)->visit(Op));
+        Changed |= Op != Operands.back();
+      }
+      return !Changed ? Expr : SE.getSMaxExpr(Operands);
     }
 
     const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
       SmallVector<const SCEV *, 2> Operands;
-      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
-        Operands.push_back(((SC*)this)->visit(Expr->getOperand(i)));
-      return SE.getUMaxExpr(Operands);
+      bool Changed = false;
+      for (auto *Op : Expr->operands()) {
+        Operands.push_back(((SC*)this)->visit(Op));
+        Changed |= Op != Operands.back();
+      }
+      return !Changed ? Expr : SE.getUMaxExpr(Operands);
     }
 
     const SCEV *visitUnknown(const SCEVUnknown *Expr) {
diff --git a/include/llvm/Analysis/ScalarEvolutionNormalization.h b/include/llvm/Analysis/ScalarEvolutionNormalization.h
index 7c6423a21cfa..b73ad95278a0 100644
--- a/include/llvm/Analysis/ScalarEvolutionNormalization.h
+++ b/include/llvm/Analysis/ScalarEvolutionNormalization.h
@@ -37,42 +37,33 @@
 #define LLVM_ANALYSIS_SCALAREVOLUTIONNORMALIZATION_H
 
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
 
 namespace llvm {
 
-class Instruction;
-class DominatorTree;
 class Loop;
 class ScalarEvolution;
 class SCEV;
-class Value;
 
-/// TransformKind - Different types of transformations that
-/// TransformForPostIncUse can do.
-enum TransformKind {
-  /// Normalize - Normalize according to the given loops.
-  Normalize,
-  /// NormalizeAutodetect - Detect post-inc opportunities on new expressions,
-  /// update the given loop set, and normalize.
-  NormalizeAutodetect,
-  /// Denormalize - Perform the inverse transform on the expression with the
-  /// given loop set.
-  Denormalize
-};
-
-/// PostIncLoopSet - A set of loops.
 typedef SmallPtrSet<const Loop *, 2> PostIncLoopSet;
 
-/// TransformForPostIncUse - Transform the given expression according to the
-/// given transformation kind.
-const SCEV *TransformForPostIncUse(TransformKind Kind,
-                                   const SCEV *S,
-                                   Instruction *User,
-                                   Value *OperandValToReplace,
-                                   PostIncLoopSet &Loops,
-                                   ScalarEvolution &SE,
-                                   DominatorTree &DT);
+typedef function_ref<bool(const SCEVAddRecExpr *)> NormalizePredTy;
+
+/// Normalize \p S to be post-increment for all loops present in \p
+/// Loops.
+const SCEV *normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops,
+                                   ScalarEvolution &SE);
+
+/// Normalize \p S for all add recurrence sub-expressions for which \p
+/// Pred returns true.
+const SCEV *normalizeForPostIncUseIf(const SCEV *S, NormalizePredTy Pred,
+                                     ScalarEvolution &SE);
 
-}
+/// Denormalize \p S to be post-increment for all loops present in \p
+/// Loops.
+const SCEV *denormalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops,
+                                     ScalarEvolution &SE);
+} // namespace llvm
 
 #endif
diff --git a/include/llvm/Analysis/TargetLibraryInfo.def b/include/llvm/Analysis/TargetLibraryInfo.def
index 5d5e5b127e63..637fc7ed30dd 100644
--- a/include/llvm/Analysis/TargetLibraryInfo.def
+++ b/include/llvm/Analysis/TargetLibraryInfo.def
@@ -20,7 +20,7 @@
 // One of TLI_DEFINE_ENUM/STRING are defined.
 
 #if defined(TLI_DEFINE_ENUM)
-#define TLI_DEFINE_ENUM_INTERNAL(enum_variant) enum_variant,
+#define TLI_DEFINE_ENUM_INTERNAL(enum_variant) LibFunc_##enum_variant,
 #define TLI_DEFINE_STRING_INTERNAL(string_repr)
 #else
 #define TLI_DEFINE_ENUM_INTERNAL(enum_variant)
diff --git a/include/llvm/Analysis/TargetLibraryInfo.h b/include/llvm/Analysis/TargetLibraryInfo.h
index 8675882431d5..944250cfd6ac 100644
--- a/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/include/llvm/Analysis/TargetLibraryInfo.h
@@ -30,14 +30,12 @@ struct VecDesc {
   unsigned VectorizationFactor;
 };
 
-  namespace LibFunc {
-    enum Func {
+  enum LibFunc {
 #define TLI_DEFINE_ENUM
 #include "llvm/Analysis/TargetLibraryInfo.def"
 
-      NumLibFuncs
-    };
-  }
+    NumLibFuncs
+  };
 
 /// Implementation of the target library information.
 ///
@@ -48,9 +46,9 @@ struct VecDesc {
 class TargetLibraryInfoImpl {
   friend class TargetLibraryInfo;
 
-  unsigned char AvailableArray[(LibFunc::NumLibFuncs+3)/4];
+  unsigned char AvailableArray[(NumLibFuncs+3)/4];
   llvm::DenseMap<unsigned, std::string> CustomNames;
-  static StringRef const StandardNames[LibFunc::NumLibFuncs];
+  static StringRef const StandardNames[NumLibFuncs];
   bool ShouldExtI32Param, ShouldExtI32Return, ShouldSignExtI32Param;
 
   enum AvailabilityState {
@@ -58,11 +56,11 @@ class TargetLibraryInfoImpl {
     CustomName = 1,
     Unavailable = 0  // (memset to all zeros)
   };
-  void setState(LibFunc::Func F, AvailabilityState State) {
+  void setState(LibFunc F, AvailabilityState State) {
     AvailableArray[F/4] &= ~(3 << 2*(F&3));
     AvailableArray[F/4] |= State << 2*(F&3);
   }
-  AvailabilityState getState(LibFunc::Func F) const {
+  AvailabilityState getState(LibFunc F) const {
     return static_cast<AvailabilityState>((AvailableArray[F/4] >> 2*(F&3)) & 3);
   }
 
@@ -74,7 +72,7 @@ class TargetLibraryInfoImpl {
 
   /// Return true if the function type FTy is valid for the library function
   /// F, regardless of whether the function is available.
-  bool isValidProtoForLibFunc(const FunctionType &FTy, LibFunc::Func F,
+  bool isValidProtoForLibFunc(const FunctionType &FTy, LibFunc F,
                               const DataLayout *DL) const;
 
 public:
@@ -104,28 +102,28 @@ public:
   ///
   /// If it is one of the known library functions, return true and set F to the
   /// corresponding value.
-  bool getLibFunc(StringRef funcName, LibFunc::Func &F) const;
+  bool getLibFunc(StringRef funcName, LibFunc &F) const;
 
   /// Searches for a particular function name, also checking that its type is
   /// valid for the library function matching that name.
   ///
   /// If it is one of the known library functions, return true and set F to the
   /// corresponding value.
-  bool getLibFunc(const Function &FDecl, LibFunc::Func &F) const;
+  bool getLibFunc(const Function &FDecl, LibFunc &F) const;
 
   /// Forces a function to be marked as unavailable.
-  void setUnavailable(LibFunc::Func F) {
+  void setUnavailable(LibFunc F) {
     setState(F, Unavailable);
   }
 
   /// Forces a function to be marked as available.
-  void setAvailable(LibFunc::Func F) {
+  void setAvailable(LibFunc F) {
     setState(F, StandardName);
   }
 
   /// Forces a function to be marked as available and provide an alternate name
   /// that must be used.
-  void setAvailableWithName(LibFunc::Func F, StringRef Name) {
+  void setAvailableWithName(LibFunc F, StringRef Name) {
     if (StandardNames[F] != Name) {
       setState(F, CustomName);
       CustomNames[F] = Name;
@@ -225,16 +223,16 @@ public:
   ///
   /// If it is one of the known library functions, return true and set F to the
   /// corresponding value.
-  bool getLibFunc(StringRef funcName, LibFunc::Func &F) const {
+  bool getLibFunc(StringRef funcName, LibFunc &F) const {
     return Impl->getLibFunc(funcName, F);
   }
 
-  bool getLibFunc(const Function &FDecl, LibFunc::Func &F) const {
+  bool getLibFunc(const Function &FDecl, LibFunc &F) const {
     return Impl->getLibFunc(FDecl, F);
   }
 
   /// Tests whether a library function is available.
-  bool has(LibFunc::Func F) const {
+  bool has(LibFunc F) const {
     return Impl->getState(F) != TargetLibraryInfoImpl::Unavailable;
   }
   bool isFunctionVectorizable(StringRef F, unsigned VF) const {
@@ -249,37 +247,37 @@ public:
 
   /// Tests if the function is both available and a candidate for optimized code
   /// generation.
-  bool hasOptimizedCodeGen(LibFunc::Func F) const {
+  bool hasOptimizedCodeGen(LibFunc F) const {
     if (Impl->getState(F) == TargetLibraryInfoImpl::Unavailable)
       return false;
     switch (F) {
     default: break;
-    case LibFunc::copysign:  case LibFunc::copysignf:  case LibFunc::copysignl:
-    case LibFunc::fabs:      case LibFunc::fabsf:      case LibFunc::fabsl:
-    case LibFunc::sin:       case LibFunc::sinf:       case LibFunc::sinl:
-    case LibFunc::cos:       case LibFunc::cosf:       case LibFunc::cosl:
-    case LibFunc::sqrt:      case LibFunc::sqrtf:      case LibFunc::sqrtl:
-    case LibFunc::sqrt_finite: case LibFunc::sqrtf_finite:
-                                                  case LibFunc::sqrtl_finite:
-    case LibFunc::fmax:      case LibFunc::fmaxf:      case LibFunc::fmaxl:
-    case LibFunc::fmin:      case LibFunc::fminf:      case LibFunc::fminl:
-    case LibFunc::floor:     case LibFunc::floorf:     case LibFunc::floorl:
-    case LibFunc::nearbyint: case LibFunc::nearbyintf: case LibFunc::nearbyintl:
-    case LibFunc::ceil:      case LibFunc::ceilf:      case LibFunc::ceill:
-    case LibFunc::rint:      case LibFunc::rintf:      case LibFunc::rintl:
-    case LibFunc::round:     case LibFunc::roundf:     case LibFunc::roundl:
-    case LibFunc::trunc:     case LibFunc::truncf:     case LibFunc::truncl:
-    case LibFunc::log2:      case LibFunc::log2f:      case LibFunc::log2l:
-    case LibFunc::exp2:      case LibFunc::exp2f:      case LibFunc::exp2l:
-    case LibFunc::memcmp:    case LibFunc::strcmp:     case LibFunc::strcpy:
-    case LibFunc::stpcpy:    case LibFunc::strlen:     case LibFunc::strnlen:
-    case LibFunc::memchr:    case LibFunc::mempcpy:
+    case LibFunc_copysign:     case LibFunc_copysignf:  case LibFunc_copysignl:
+    case LibFunc_fabs:         case LibFunc_fabsf:      case LibFunc_fabsl:
+    case LibFunc_sin:          case LibFunc_sinf:       case LibFunc_sinl:
+    case LibFunc_cos:          case LibFunc_cosf:       case LibFunc_cosl:
+    case LibFunc_sqrt:         case LibFunc_sqrtf:      case LibFunc_sqrtl:
+    case LibFunc_sqrt_finite:  case LibFunc_sqrtf_finite:
+                                                   case LibFunc_sqrtl_finite:
+    case LibFunc_fmax:         case LibFunc_fmaxf:      case LibFunc_fmaxl:
+    case LibFunc_fmin:         case LibFunc_fminf:      case LibFunc_fminl:
+    case LibFunc_floor:        case LibFunc_floorf:     case LibFunc_floorl:
+    case LibFunc_nearbyint:    case LibFunc_nearbyintf: case LibFunc_nearbyintl:
+    case LibFunc_ceil:         case LibFunc_ceilf:      case LibFunc_ceill:
+    case LibFunc_rint:         case LibFunc_rintf:      case LibFunc_rintl:
+    case LibFunc_round:        case LibFunc_roundf:     case LibFunc_roundl:
+    case LibFunc_trunc:        case LibFunc_truncf:     case LibFunc_truncl:
+    case LibFunc_log2:         case LibFunc_log2f:      case LibFunc_log2l:
+    case LibFunc_exp2:         case LibFunc_exp2f:      case LibFunc_exp2l:
+    case LibFunc_memcmp:       case LibFunc_strcmp:     case LibFunc_strcpy:
+    case LibFunc_stpcpy:       case LibFunc_strlen:     case LibFunc_strnlen:
+    case LibFunc_memchr:       case LibFunc_mempcpy:
       return true;
     }
     return false;
   }
 
-  StringRef getName(LibFunc::Func F) const {
+  StringRef getName(LibFunc F) const {
     auto State = Impl->getState(F);
     if (State == TargetLibraryInfoImpl::Unavailable)
       return StringRef();
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h
index 209f05c279d0..67196687d556 100644
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -44,23 +44,26 @@ class Value;
 
 /// \brief Information about a load/store intrinsic defined by the target.
 struct MemIntrinsicInfo {
-  MemIntrinsicInfo()
-      : ReadMem(false), WriteMem(false), IsSimple(false), MatchingId(0),
-        NumMemRefs(0), PtrVal(nullptr) {}
-  bool ReadMem;
-  bool WriteMem;
-  /// True only if this memory operation is non-volatile, non-atomic, and
-  /// unordered.  (See LoadInst/StoreInst for details on each)
-  bool IsSimple;
-  // Same Id is set by the target for corresponding load/store intrinsics.
-  unsigned short MatchingId;
-  int NumMemRefs;
-
   /// This is the pointer that the intrinsic is loading from or storing to.
   /// If this is non-null, then analysis/optimization passes can assume that
   /// this intrinsic is functionally equivalent to a load/store from this
   /// pointer.
-  Value *PtrVal;
+  Value *PtrVal = nullptr;
+
+  // Ordering for atomic operations.
+  AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
+
+  // Same Id is set by the target for corresponding load/store intrinsics.
+  unsigned short MatchingId = 0;
+
+  bool ReadMem = false;
+  bool WriteMem = false;
+  bool IsVolatile = false;
+
+  bool isUnordered() const {
+    return (Ordering == AtomicOrdering::NotAtomic ||
+            Ordering == AtomicOrdering::Unordered) && !IsVolatile;
+  }
 };
 
 /// \brief This pass provides access to the codegen interfaces that are needed
@@ -226,6 +229,24 @@ public:
   /// starting with the sources of divergence.
   bool isSourceOfDivergence(const Value *V) const;
 
+  /// Returns the address space ID for a target's 'flat' address space. Note
+  /// this is not necessarily the same as addrspace(0), which LLVM sometimes
+  /// refers to as the generic address space. The flat address space is a
+  /// generic address space that can be used access multiple segments of memory
+  /// with different address spaces. Access of a memory location through a
+  /// pointer with this address space is expected to be legal but slower
+  /// compared to the same memory location accessed through a pointer with a
+  /// different address space.
+  //
+  /// This is for for targets with different pointer representations which can
+  /// be converted with the addrspacecast instruction. If a pointer is converted
+  /// to this address space, optimizations should attempt to replace the access
+  /// with the source address space.
+  ///
+  /// \returns ~0u if the target does not have such a flat address space to
+  /// optimize away.
+  unsigned getFlatAddressSpace() const;
+
   /// \brief Test whether calls to a function lower to actual program function
   /// calls.
   ///
@@ -411,6 +432,16 @@ public:
   /// containing this constant value for the target.
   bool shouldBuildLookupTablesForConstant(Constant *C) const;
 
+  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+
+  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+                                            unsigned VF) const;
+
+  /// If target has efficient vector element load/store instructions, it can
+  /// return true here so that insertion/extraction costs are not added to
+  /// the scalarization cost of a load/store.
+  bool supportsEfficientVectorElementLoadStore() const;
+
   /// \brief Don't restrict interleaved unrolling to small loops.
   bool enableAggressiveInterleaving(bool LoopHasReductions) const;
 
@@ -500,6 +531,12 @@ public:
   /// \return The width of the largest scalar or vector register type.
   unsigned getRegisterBitWidth(bool Vector) const;
 
+  /// \return True if it should be considered for address type promotion.
+  /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
+  /// profitable without finding other extensions fed by the same input.
+  bool shouldConsiderAddressTypePromotion(
+      const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
+
   /// \return The size of a cache line in bytes.
   unsigned getCacheLineSize() const;
 
@@ -540,8 +577,10 @@ public:
                      Type *SubTp = nullptr) const;
 
   /// \return The expected cost of cast instructions, such as bitcast, trunc,
-  /// zext, etc.
-  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const;
+  /// zext, etc. If there is an existing instruction that holds Opcode, it
+  /// may be passed in the 'I' parameter.
+  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+                       const Instruction *I = nullptr) const;
 
   /// \return The expected cost of a sign- or zero-extended vector extract. Use
   /// -1 to indicate that there is no information about the index value.
@@ -552,9 +591,11 @@ public:
   /// Phi, Ret, Br.
   int getCFInstrCost(unsigned Opcode) const;
 
-  /// \returns The expected cost of compare and select instructions.
+  /// \returns The expected cost of compare and select instructions. If there
+  /// is an existing instruction that holds Opcode, it may be passed in the
+  /// 'I' parameter.
   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                         Type *CondTy = nullptr) const;
+                 Type *CondTy = nullptr, const Instruction *I = nullptr) const;
 
   /// \return The expected cost of vector Insert and Extract.
   /// Use -1 to indicate that there is no information on the index value.
@@ -562,7 +603,7 @@ public:
 
   /// \return The cost of Load and Store instructions.
   int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
-                      unsigned AddressSpace) const;
+                      unsigned AddressSpace, const Instruction *I = nullptr) const;
 
   /// \return The cost of masked Load and Store instructions.
   int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
@@ -605,13 +646,19 @@ public:
   ///  ((v0+v2), (v1+v3), undef, undef)
   int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) const;
 
-  /// \returns The cost of Intrinsic instructions. Types analysis only.
+  /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
+  /// Three cases are handled: 1. scalar instruction 2. vector instruction
+  /// 3. scalar instruction which is to be vectorized with VF.
   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                            ArrayRef<Type *> Tys, FastMathFlags FMF) const;
+                            ArrayRef<Value *> Args, FastMathFlags FMF,
+                            unsigned VF = 1) const;
 
-  /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
+  /// \returns The cost of Intrinsic instructions. Types analysis only.
+  /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the
+  /// arguments and the return value will be computed based on types.
   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                            ArrayRef<Value *> Args, FastMathFlags FMF) const;
+                            ArrayRef<Type *> Tys, FastMathFlags FMF,
+                            unsigned ScalarizationCostPassed = UINT_MAX) const;
 
   /// \returns The cost of Call instructions.
   int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
@@ -720,6 +767,7 @@ public:
   virtual int getUserCost(const User *U) = 0;
   virtual bool hasBranchDivergence() = 0;
   virtual bool isSourceOfDivergence(const Value *V) = 0;
+  virtual unsigned getFlatAddressSpace() = 0;
   virtual bool isLoweredToCall(const Function *F) = 0;
   virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) = 0;
   virtual bool isLegalAddImmediate(int64_t Imm) = 0;
@@ -743,6 +791,11 @@ public:
   virtual unsigned getJumpBufSize() = 0;
   virtual bool shouldBuildLookupTables() = 0;
   virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
+  virtual unsigned
+  getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0;
+  virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+                                                    unsigned VF) = 0;
+  virtual bool supportsEfficientVectorElementLoadStore() = 0;
   virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
   virtual bool enableInterleavedAccessVectorization() = 0;
   virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
@@ -763,6 +816,8 @@ public:
                             Type *Ty) = 0;
   virtual unsigned getNumberOfRegisters(bool Vector) = 0;
   virtual unsigned getRegisterBitWidth(bool Vector) = 0;
+  virtual bool shouldConsiderAddressTypePromotion(
+      const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
   virtual unsigned getCacheLineSize() = 0;
   virtual unsigned getPrefetchDistance() = 0;
   virtual unsigned getMinPrefetchStride() = 0;
@@ -776,16 +831,17 @@ public:
                          ArrayRef<const Value *> Args) = 0;
   virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
                              Type *SubTp) = 0;
-  virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) = 0;
+  virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+                               const Instruction *I) = 0;
   virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
                                        VectorType *VecTy, unsigned Index) = 0;
   virtual int getCFInstrCost(unsigned Opcode) = 0;
   virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                                 Type *CondTy) = 0;
+                                Type *CondTy, const Instruction *I) = 0;
   virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
                                  unsigned Index) = 0;
   virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
-                              unsigned AddressSpace) = 0;
+                              unsigned AddressSpace, const Instruction *I) = 0;
   virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
                                     unsigned Alignment,
                                     unsigned AddressSpace) = 0;
@@ -800,11 +856,10 @@ public:
   virtual int getReductionCost(unsigned Opcode, Type *Ty,
                                bool IsPairwiseForm) = 0;
   virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                                    ArrayRef<Type *> Tys,
-                                    FastMathFlags FMF) = 0;
+                      ArrayRef<Type *> Tys, FastMathFlags FMF,
+                      unsigned ScalarizationCostPassed) = 0;
   virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                                    ArrayRef<Value *> Args,
-                                    FastMathFlags FMF) = 0;
+         ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) = 0;
   virtual int getCallInstrCost(Function *F, Type *RetTy,
                                ArrayRef<Type *> Tys) = 0;
   virtual unsigned getNumberOfParts(Type *Tp) = 0;
@@ -879,6 +934,11 @@ public:
   bool isSourceOfDivergence(const Value *V) override {
     return Impl.isSourceOfDivergence(V);
   }
+
+  unsigned getFlatAddressSpace() override {
+    return Impl.getFlatAddressSpace();
+  }
+
   bool isLoweredToCall(const Function *F) override {
     return Impl.isLoweredToCall(F);
   }
@@ -933,6 +993,19 @@ public:
   bool shouldBuildLookupTablesForConstant(Constant *C) override {
     return Impl.shouldBuildLookupTablesForConstant(C);
   }
+  unsigned getScalarizationOverhead(Type *Ty, bool Insert,
+                                    bool Extract) override {
+    return Impl.getScalarizationOverhead(Ty, Insert, Extract);
+  }
+  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+                                            unsigned VF) override {
+    return Impl.getOperandsScalarizationOverhead(Args, VF);
+  }
+
+  bool supportsEfficientVectorElementLoadStore() override {
+    return Impl.supportsEfficientVectorElementLoadStore();
+  }
+
   bool enableAggressiveInterleaving(bool LoopHasReductions) override {
     return Impl.enableAggressiveInterleaving(LoopHasReductions);
   }
@@ -976,7 +1049,11 @@ public:
   unsigned getRegisterBitWidth(bool Vector) override {
     return Impl.getRegisterBitWidth(Vector);
   }
-
+  bool shouldConsiderAddressTypePromotion(
+      const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
+    return Impl.shouldConsiderAddressTypePromotion(
+        I, AllowPromotionWithoutCommonHeader);
+  }
   unsigned getCacheLineSize() override {
     return Impl.getCacheLineSize();
   }
@@ -1003,8 +1080,9 @@ public:
                      Type *SubTp) override {
     return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
   }
-  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) override {
-    return Impl.getCastInstrCost(Opcode, Dst, Src);
+  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+                       const Instruction *I) override {
+    return Impl.getCastInstrCost(Opcode, Dst, Src, I);
   }
   int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
                                unsigned Index) override {
@@ -1013,15 +1091,16 @@ public:
   int getCFInstrCost(unsigned Opcode) override {
     return Impl.getCFInstrCost(Opcode);
   }
-  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) override {
-    return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy);
+  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+                         const Instruction *I) override {
+    return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
   }
   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
     return Impl.getVectorInstrCost(Opcode, Val, Index);
   }
   int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
-                      unsigned AddressSpace) override {
-    return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+                      unsigned AddressSpace, const Instruction *I) override {
+    return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
   }
   int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
                             unsigned AddressSpace) override {
@@ -1044,13 +1123,13 @@ public:
     return Impl.getReductionCost(Opcode, Ty, IsPairwiseForm);
   }
   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
-                            FastMathFlags FMF) override {
-    return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF);
+               FastMathFlags FMF, unsigned ScalarizationCostPassed) override {
+    return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
+                                      ScalarizationCostPassed);
   }
   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                            ArrayRef<Value *> Args,
-                            FastMathFlags FMF) override {
-    return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF);
+       ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) override {
+    return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
   }
   int getCallInstrCost(Function *F, Type *RetTy,
                        ArrayRef<Type *> Tys) override {
diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h
index cafc40723c9d..9ab6b7445ab8 100644
--- a/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -171,6 +171,10 @@ public:
 
   bool isSourceOfDivergence(const Value *V) { return false; }
 
+  unsigned getFlatAddressSpace () {
+    return -1;
+  }
+
   bool isLoweredToCall(const Function *F) {
     // FIXME: These should almost certainly not be handled here, and instead
     // handled with the help of TLI or the target itself. This was largely
@@ -251,6 +255,15 @@ public:
   bool shouldBuildLookupTables() { return true; }
   bool shouldBuildLookupTablesForConstant(Constant *C) { return true; }
 
+  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
+    return 0;
+  }
+
+  unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+                                            unsigned VF) { return 0; }
+
+  bool supportsEfficientVectorElementLoadStore() { return false; }
+
   bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; }
 
   bool enableInterleavedAccessVectorization() { return false; }
@@ -292,6 +305,13 @@ public:
 
   unsigned getRegisterBitWidth(bool Vector) { return 32; }
 
+  bool
+  shouldConsiderAddressTypePromotion(const Instruction &I,
+                                     bool &AllowPromotionWithoutCommonHeader) {
+    AllowPromotionWithoutCommonHeader = false;
+    return false;
+  }
+
   unsigned getCacheLineSize() { return 0; }
 
   unsigned getPrefetchDistance() { return 0; }
@@ -316,7 +336,8 @@ public:
     return 1;
   }
 
-  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { return 1; }
+  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+                            const Instruction *I) { return 1; }
 
   unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
                                     VectorType *VecTy, unsigned Index) {
@@ -325,7 +346,8 @@ public:
 
   unsigned getCFInstrCost(unsigned Opcode) { return 1; }
 
-  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
+  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+                              const Instruction *I) {
     return 1;
   }
 
@@ -334,7 +356,7 @@ public:
   }
 
   unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
-                           unsigned AddressSpace) {
+                           unsigned AddressSpace, const Instruction *I) {
     return 1;
   }
 
@@ -358,11 +380,12 @@ public:
   }
 
   unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                                 ArrayRef<Type *> Tys, FastMathFlags FMF) {
+                                 ArrayRef<Type *> Tys, FastMathFlags FMF,
+                                 unsigned ScalarizationCostPassed) {
     return 1;
   }
   unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                                 ArrayRef<Value *> Args, FastMathFlags FMF) {
+            ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) {
     return 1;
   }
 
diff --git a/include/llvm/Analysis/TypeMetadataUtils.h b/include/llvm/Analysis/TypeMetadataUtils.h
index c3f688f5d7f1..17906ba4e392 100644
--- a/include/llvm/Analysis/TypeMetadataUtils.h
+++ b/include/llvm/Analysis/TypeMetadataUtils.h
@@ -32,14 +32,15 @@ struct DevirtCallSite {
 /// call sites based on the call and return them in DevirtCalls.
 void findDevirtualizableCallsForTypeTest(
     SmallVectorImpl<DevirtCallSite> &DevirtCalls,
-    SmallVectorImpl<CallInst *> &Assumes, CallInst *CI);
+    SmallVectorImpl<CallInst *> &Assumes, const CallInst *CI);
 
 /// Given a call to the intrinsic @llvm.type.checked.load, find all
 /// devirtualizable call sites based on the call and return them in DevirtCalls.
 void findDevirtualizableCallsForTypeCheckedLoad(
     SmallVectorImpl<DevirtCallSite> &DevirtCalls,
     SmallVectorImpl<Instruction *> &LoadedPtrs,
-    SmallVectorImpl<Instruction *> &Preds, bool &HasNonCallUses, CallInst *CI);
+    SmallVectorImpl<Instruction *> &Preds, bool &HasNonCallUses,
+    const CallInst *CI);
 }
 
 #endif
diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h
index aaf6f888e06f..e3c2f3bed227 100644
--- a/include/llvm/Analysis/ValueTracking.h
+++ b/include/llvm/Analysis/ValueTracking.h
@@ -31,6 +31,7 @@ template <typename T> class ArrayRef;
   class Instruction;
   class Loop;
   class LoopInfo;
+  class OptimizationRemarkEmitter;
   class MDNode;
   class StringRef;
   class TargetLibraryInfo;
@@ -52,7 +53,8 @@ template <typename T> class ArrayRef;
                         const DataLayout &DL, unsigned Depth = 0,
                         AssumptionCache *AC = nullptr,
                         const Instruction *CxtI = nullptr,
-                        const DominatorTree *DT = nullptr);
+                        const DominatorTree *DT = nullptr,
+                        OptimizationRemarkEmitter *ORE = nullptr);
   /// Compute known bits from the range metadata.
   /// \p KnownZero the set of bits that are known to be zero
   /// \p KnownOne the set of bits that are known to be one
@@ -86,8 +88,10 @@ template <typename T> class ArrayRef;
 
   /// Return true if the given value is known to be non-zero when defined. For
   /// vectors, return true if every element is known to be non-zero when
-  /// defined. Supports values with integer or pointer type and vectors of
-  /// integers.
+  /// defined. For pointers, if the context instruction and dominator tree are
+  /// specified, perform context-sensitive analysis and return true if the
+  /// pointer couldn't possibly be null at the specified instruction.
+  /// Supports values with integer or pointer type and vectors of integers.
   bool isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth = 0,
                       AssumptionCache *AC = nullptr,
                       const Instruction *CxtI = nullptr,
@@ -167,13 +171,25 @@ template <typename T> class ArrayRef;
   bool CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI,
                             unsigned Depth = 0);
 
-  /// Return true if we can prove that the specified FP value is either a NaN or
-  /// never less than 0.0.
-  /// If \p IncludeNeg0 is false, -0.0 is considered less than 0.0.
+  /// Return true if we can prove that the specified FP value is either NaN or
+  /// never less than -0.0.
+  ///
+  ///      NaN --> true
+  ///       +0 --> true
+  ///       -0 --> true
+  ///   x > +0 --> true
+  ///   x < -0 --> false
+  ///
   bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI);
 
-  /// \returns true if we can prove that the specified FP value has a 0 sign
-  /// bit.
+  /// Return true if we can prove that the specified FP value's sign bit is 0.
+  ///
+  ///      NaN --> true/false (depending on the NaN's sign bit)
+  ///       +0 --> true
+  ///       -0 --> false
+  ///   x > +0 --> true
+  ///   x < -0 --> false
+  ///
   bool SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI);
 
   /// If the specified value can be set by repeating the same byte in memory,
diff --git a/include/llvm/Analysis/VectorUtils.h b/include/llvm/Analysis/VectorUtils.h
index eaa068b89c77..6315e8408f05 100644
--- a/include/llvm/Analysis/VectorUtils.h
+++ b/include/llvm/Analysis/VectorUtils.h
@@ -1,4 +1,4 @@
-//===- llvm/Transforms/Utils/VectorUtils.h - Vector utilities -*- C++ -*-=====//
+//===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,11 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TRANSFORMS_UTILS_VECTORUTILS_H
-#define LLVM_TRANSFORMS_UTILS_VECTORUTILS_H
+#ifndef LLVM_ANALYSIS_VECTORUTILS_H
+#define LLVM_ANALYSIS_VECTORUTILS_H
 
 #include "llvm/ADT/MapVector.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/IRBuilder.h"
 
 namespace llvm {
 
@@ -123,6 +124,58 @@ computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks,
 /// This function always sets a (possibly null) value for each K in Kinds.
 Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL);
 
+/// \brief Create an interleave shuffle mask.
+///
+/// This function creates a shuffle mask for interleaving \p NumVecs vectors of
+/// vectorization factor \p VF into a single wide vector. The mask is of the
+/// form:
+///
+///   <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...>
+///
+/// For example, the mask for VF = 4 and NumVecs = 2 is:
+///
+///   <0, 4, 1, 5, 2, 6, 3, 7>.
+Constant *createInterleaveMask(IRBuilder<> &Builder, unsigned VF,
+                               unsigned NumVecs);
+
+/// \brief Create a stride shuffle mask.
+///
+/// This function creates a shuffle mask whose elements begin at \p Start and
+/// are incremented by \p Stride. The mask can be used to deinterleave an
+/// interleaved vector into separate vectors of vectorization factor \p VF. The
+/// mask is of the form:
+///
+///   <Start, Start + Stride, ..., Start + Stride * (VF - 1)>
+///
+/// For example, the mask for Start = 0, Stride = 2, and VF = 4 is:
+///
+///   <0, 2, 4, 6>
+Constant *createStrideMask(IRBuilder<> &Builder, unsigned Start,
+                           unsigned Stride, unsigned VF);
+
+/// \brief Create a sequential shuffle mask.
+///
+/// This function creates shuffle mask whose elements are sequential and begin
+/// at \p Start.  The mask contains \p NumInts integers and is padded with \p
+/// NumUndefs undef values. The mask is of the form:
+///
+///   <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs>
+///
+/// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is:
+///
+///   <0, 1, 2, 3, undef, undef, undef, undef>
+Constant *createSequentialMask(IRBuilder<> &Builder, unsigned Start,
+                               unsigned NumInts, unsigned NumUndefs);
+
+/// \brief Concatenate a list of vectors.
+///
+/// This function generates code that concatenate the vectors in \p Vecs into a
+/// single large vector. The number of vectors should be greater than one, and
+/// their element types should be the same. The number of elements in the
+/// vectors should also be the same; however, if the last vector has fewer
+/// elements, it will be padded with undefs.
+Value *concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs);
+
 } // llvm namespace
 
 #endif